#include // fprintf, stdout, stderr. #include // exit, EXIT_FAILURE, EXIT_SUCCESS. #include // strerror. #include // errno. #include enum FileType{ASCII, ISO, UTF, data}; bool is_utf8(FILE* f) { rewind(f); unsigned char byte; int char_length = -1; for (;;) { if (fread(&byte, 1, 1, f) == 0) { break; } if (byte == 0){ return false; } for (int j = 0 ; j <= 8 ; j++) { if (byte >> 7 == 0) { char_length = j; break; } byte = byte << 1; } if (char_length == -1) { return false; } for (int i = 1 ; i < char_length ; i++) { if (fread(&byte, 1, 1, f) == 0 || byte >> 6 != 2) { return false; } } } return true; } bool is_iso8859(FILE* f) { rewind(f); int iso8859_CHARACTERS[197]; for (int i = 0 ; i < 7 ; i++) { iso8859_CHARACTERS[i] = i+7; } iso8859_CHARACTERS[7] = 27; for (int i = 0 ; i < 94 ; i++) { iso8859_CHARACTERS[i+8] = i+32; } for (int i = 0 ; i < 95 ; i++) { iso8859_CHARACTERS[i+102] = i+160; } unsigned char byte; bool iso_byte; for (;;) { if (fread(&byte, 1, 1, f) == 0) { break; } iso_byte = false; for (int i = 0 ; i < 197 ; i++) { if (byte == iso8859_CHARACTERS[i]) { iso_byte = true; break; } } if (! iso_byte) { return false; } } return true; } bool is_ascii(FILE* f) { rewind(f); int ASCII_CHARACTERS[102]; for (int i = 0 ; i < 7 ; i++) { ASCII_CHARACTERS[i] = i+7; } ASCII_CHARACTERS[7] = 27; for (int i = 0 ; i < 94 ; i++) { ASCII_CHARACTERS[i+8] = i+32; } unsigned char byte; bool ascii_byte; for (;;) { if (fread(&byte, 1, 1, f) == 0) { break; } ascii_byte = false; for (int i = 0 ; i < 102 ; i++) { if (byte == ASCII_CHARACTERS[i]) { ascii_byte = true; break; } } if (! ascii_byte) { return false; } } return true; } int print_error(char* path, int errnum) { return fprintf(stderr, "%s: cannot determine (%s)\n", path, strerror(errnum)); } int main(int argc, char* argv[]) { if (argc != 2) { printf("Wrong number of arguments!\n"); return EXIT_FAILURE; } FILE* f = fopen(argv[1], "r"); if (f == NULL) { print_error(argv[2], errno); } char byte; if (fread(&byte, sizeof(char), 1, f) == 0) { fprintf(stdout, "%s: empty\n", argv[1]); } else if (is_ascii(f)) { fprintf(stdout, "%s: ASCII text\n", argv[1]); } else if (is_iso8859(f)) { fprintf(stdout, "%s: ISO-8859 text\n", argv[1]); } else if (is_utf8(f)) { fprintf(stdout, "%s: UTF-8 Unicode text\n", argv[1]); } else { fprintf(stdout, "%s: data\n", argv[1]); } fclose(f); return EXIT_SUCCESS; }