Files
Compsys-2021-Assignments/A0/file.c

141 lines
3.1 KiB
C

#include <stdio.h> // fprintf, stdout, stderr.
#include <stdlib.h> // exit, EXIT_FAILURE, EXIT_SUCCESS.
#include <string.h> // strerror.
#include <errno.h> // errno.
#include <stdbool.h>
enum FileType{ASCII, ISO, UTF, data};
bool is_utf8(FILE* f) {
rewind(f);
unsigned char byte;
int char_length = -1;
for (;;) {
if (fread(&byte, 1, 1, f) == 0) {
break;
}
if (byte == 0){
return false;
}
for (int j = 0 ; j <= 8 ; j++) {
if (byte >> 7 == 0) {
char_length = j;
break;
}
byte = byte << 1;
}
if (char_length == -1) {
return false;
}
for (int i = 1 ; i < char_length ; i++) {
if (fread(&byte, 1, 1, f) == 0 || byte >> 6 != 2) {
return false;
}
}
}
return true;
}
bool is_iso8859(FILE* f) {
rewind(f);
int iso8859_CHARACTERS[197];
for (int i = 0 ; i < 7 ; i++) {
iso8859_CHARACTERS[i] = i+7;
}
iso8859_CHARACTERS[7] = 27;
for (int i = 0 ; i < 94 ; i++) {
iso8859_CHARACTERS[i+8] = i+32;
}
for (int i = 0 ; i < 95 ; i++) {
iso8859_CHARACTERS[i+102] = i+160;
}
unsigned char byte;
bool iso_byte;
for (;;) {
if (fread(&byte, 1, 1, f) == 0) {
break;
}
iso_byte = false;
for (int i = 0 ; i < 197 ; i++) {
if (byte == iso8859_CHARACTERS[i]) {
iso_byte = true;
break;
}
}
if (! iso_byte) {
return false;
}
}
return true;
}
bool is_ascii(FILE* f) {
rewind(f);
int ASCII_CHARACTERS[102];
for (int i = 0 ; i < 7 ; i++) {
ASCII_CHARACTERS[i] = i+7;
}
ASCII_CHARACTERS[7] = 27;
for (int i = 0 ; i < 94 ; i++) {
ASCII_CHARACTERS[i+8] = i+32;
}
unsigned char byte;
bool ascii_byte;
for (;;) {
if (fread(&byte, 1, 1, f) == 0) {
break;
}
ascii_byte = false;
for (int i = 0 ; i < 102 ; i++) {
if (byte == ASCII_CHARACTERS[i]) {
ascii_byte = true;
break;
}
}
if (! ascii_byte) {
return false;
}
}
return true;
}
int print_error(char* path, int errnum) {
return fprintf(stderr, "%s: cannot determine (%s)\n", path, strerror(errnum));
}
int main(int argc, char* argv[]) {
if (argc != 2) {
printf("Wrong number of arguments!\n");
return EXIT_FAILURE;
}
FILE* f = fopen(argv[1], "r");
if (f == NULL) {
print_error(argv[2], errno);
}
char byte;
if (fread(&byte, sizeof(char), 1, f) == 0) {
fprintf(stdout, "%s: empty\n", argv[1]);
} else if (is_ascii(f)) {
fprintf(stdout, "%s: ASCII text\n", argv[1]);
} else if (is_iso8859(f)) {
fprintf(stdout, "%s: ISO-8859 text\n", argv[1]);
} else if (is_utf8(f)) {
fprintf(stdout, "%s: UTF-8 Unicode text\n", argv[1]);
} else {
fprintf(stdout, "%s: data\n", argv[1]);
}
fclose(f);
return EXIT_SUCCESS;
}