141 lines
3.2 KiB
C
141 lines
3.2 KiB
C
#include <stdio.h> // fprintf, stdout, stderr.
|
|
#include <stdlib.h> // exit, EXIT_FAILURE, EXIT_SUCCESS.
|
|
#include <string.h> // strerror.
|
|
#include <errno.h> // errno.
|
|
#include <stdbool.h>
|
|
|
|
enum FileType{ASCII, ISO, UTF, data};
|
|
|
|
bool is_utf8(FILE* f) {
|
|
rewind(f);
|
|
unsigned char byte;
|
|
int char_length = -1;
|
|
for (;;) {
|
|
if (fread(&byte, 1, 1, f) == 0) {
|
|
break;
|
|
}
|
|
if (byte == 0){
|
|
return false;
|
|
}
|
|
for (int j = 0 ; j <= 8 ; j++) {
|
|
if (byte >> 7 == 0) {
|
|
char_length = j;
|
|
break;
|
|
}
|
|
byte = byte << 1;
|
|
}
|
|
if (char_length == -1) {
|
|
return false;
|
|
}
|
|
for (int i = 1 ; i < char_length ; i++) {
|
|
if (fread(&byte, 1, 1, f) == 0 || byte >> 6 != 2) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool is_iso8859(FILE* f) {
|
|
rewind(f);
|
|
int iso8859_CHARACTERS[197];
|
|
|
|
for (int i = 0 ; i < 7 ; i++) {
|
|
iso8859_CHARACTERS[i] = i+7;
|
|
}
|
|
iso8859_CHARACTERS[7] = 27;
|
|
for (int i = 0 ; i < 94 ; i++) {
|
|
iso8859_CHARACTERS[i+8] = i+32;
|
|
}
|
|
|
|
for (int i = 0 ; i < 95 ; i++) {
|
|
iso8859_CHARACTERS[i+102] = i+160;
|
|
}
|
|
unsigned char byte;
|
|
bool iso_byte;
|
|
for (;;) {
|
|
if (fread(&byte, 1, 1, f) == 0) {
|
|
break;
|
|
}
|
|
iso_byte = false;
|
|
for (int i = 0 ; i < 197 ; i++) {
|
|
if (byte == iso8859_CHARACTERS[i]) {
|
|
iso_byte = true;
|
|
break;
|
|
}
|
|
}
|
|
if (! iso_byte) {
|
|
return false;
|
|
}
|
|
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool is_ascii(FILE* f) {
|
|
rewind(f);
|
|
int ASCII_CHARACTERS[102];
|
|
|
|
for (int i = 0 ; i < 7 ; i++) {
|
|
ASCII_CHARACTERS[i] = i+7;
|
|
}
|
|
ASCII_CHARACTERS[7] = 27;
|
|
for (int i = 0 ; i < 94 ; i++) {
|
|
ASCII_CHARACTERS[i+8] = i+32;
|
|
}
|
|
|
|
unsigned char byte;
|
|
bool ascii_byte;
|
|
for (;;) {
|
|
if (fread(&byte, 1, 1, f) == 0) {
|
|
break;
|
|
}
|
|
ascii_byte = false;
|
|
for (int i = 0 ; i < 102 ; i++) {
|
|
if (byte == ASCII_CHARACTERS[i]) {
|
|
ascii_byte = true;
|
|
break;
|
|
}
|
|
}
|
|
if (! ascii_byte) {
|
|
return false;
|
|
}
|
|
|
|
}
|
|
return true;
|
|
|
|
}
|
|
|
|
int print_error(char* path, int errnum) {
|
|
return fprintf(stderr, "%s: cannot determine (%s)\n", path, strerror(errnum));
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
|
|
if (argc != 2) {
|
|
printf("Wrong number of arguments!\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
FILE* f = fopen(argv[1], "r");
|
|
|
|
if (f == NULL) {
|
|
print_error(argv[2], errno);
|
|
}
|
|
char byte;
|
|
if (fread(&byte, sizeof(char), 1, f) == 0) {
|
|
fprintf(stdout, "%s: empty\n", argv[1]);
|
|
} else if (is_ascii(f)) {
|
|
fprintf(stdout, "%s: ASCII text\n", argv[1]);
|
|
} else if (is_iso8859(f)) {
|
|
fprintf(stdout, "%s: ISO-8859 text\n", argv[1]);
|
|
} else if (is_utf8(f)) {
|
|
fprintf(stdout, "%s: Unicode text, UTF-8 text\n", argv[1]);
|
|
} else {
|
|
fprintf(stdout, "%s: data\n", argv[1]);
|
|
}
|
|
|
|
fclose(f);
|
|
return EXIT_SUCCESS;
|
|
}
|