/******************************************************************* hhp ***** *** Author: Rob Gubler -- tarsin@happy.digitaldune.net *** *** Date: 1998.05.03.r00 *** *** Site: http://www.hhp-programming.net/ *** *** Description: Document Analysis *** *** Comments: Really only useful for cryptanalysis *** *** -s parameter is very CPU intensive; it's best to break *** *** up your document into smaller pieces if you want to use *** *** the string analysis *** *****************************************************************************/ #include #include typedef struct char_info_s { char *occurrence_str; char *rank_str; char *percentage_str; } char_info; int add_to_list(char_info *info); unsigned long get_doc_len(FILE *fi); void print_ch_occurrence(unsigned long *ascii_val, unsigned short sz); void print_ch_used_most(unsigned long *ascii_val, unsigned short sz); void print_ch_percentage_used(unsigned long *ascii_val, unsigned short sz, unsign ed long total_bytes); void print_str_occurrence(char *buffer, unsigned long buff_size); void print_stars(int ch, unsigned long occrrence, unsigned short star_num, float percent); int str_mid(unsigned long start, unsigned long count, char *buffer, unsigned long buffer_size, char *mid_str, unsigned long mid_str_size); unsigned long src_buffer_for_str_occurrence(char *buffer, unsigned long buffer_si ze, char *src_str, unsigned long src_str_len); char_info *list; unsigned short list_size; unsigned short list_array_size; int main(int argc, char *argv[]) { unsigned long ascii_val[512] = {0}; unsigned long total_bytes = 0; unsigned short ch; unsigned long res; unsigned long n; char *file_buffer; int string_search = 0; FILE *fi; if(argc < 2) { printf("\nNo file specified.\nUse the '-s' option if you want "); printf("string analysis as well.\n"); return 1; } for(n = 0; n < argc; n++) { if(!strcmp(argv[n], "-s")) string_search = 1; } if((fi = fopen(argv[1], "rt")) == '\0') { printf("Can't open \"%s\"", argv[1]); return 1; } res = get_doc_len(fi); file_buffer = calloc(1, res+1); for(n = 0; (ch = fgetc(fi)) && !feof(fi); n++) { if(ch > 511) break; ascii_val[ch] += 1; total_bytes += 1; file_buffer[n] = (char)ch; } printf("\n TEXT ANALYSIS \n_______________"); printf("\n\nCHARACTER OCCURRENCE (character type, it's value, and how many ti me it occurred)"); print_ch_occurrence(ascii_val, 512); printf("\n\nCHARACTER RANKING (comparison between the characters)"); print_ch_used_most(ascii_val, 512); printf("\n\nCHARACTER PERCENTAGE (%% of the characters used bases on total do cument length)"); print_ch_percentage_used(ascii_val, 512, total_bytes); if(string_search) { printf("\n\nSTRING OCCURRENCE (checks for file for reoccurring strings)") ; print_str_occurrence(file_buffer, res); } free(file_buffer); printf("\n"); return 0; } unsigned long get_doc_len(FILE *fi) { unsigned long fi_size; fseek(fi, 1, SEEK_END); fi_size = (ftell(fi))-1; rewind(fi); return fi_size; } void print_str_occurrence(char *buffer, unsigned long buff_size) { char *search_str; unsigned long res; unsigned long i = 0; unsigned long n = 0; unsigned long x; for(i = 0; i <= (buff_size/2); i++) { for(n = 2; n <= (buff_size/2); n++) { char* src_str = calloc(1, n+1); if(str_mid(i, n, buffer, buff_size, src_str, n+1)) { if((x = src_buffer_for_str_occurrence(buffer, buff_size, src_str, n)) > 1) printf("\n%s\npos: %d len: %d occurrence: %d\n---", src_str, i+1, n, x); } free(src_str); } } } int add_to_list(char_info *info) { if(list_size == list_array_size) { char_info *temp = calloc(list_array_size+12, sizeof(char_info)); memcpy(temp, list, sizeof(char_info)*list_array_size); free(info); list = calloc(list_array_size+12, sizeof(char_info)); memcpy(list, temp, sizeof(char_info)*list_array_size); list_array_size += 12; } list_size += 1; memcpy(&list[list_size], info, sizeof(char_info)); return 0; } void print_ch_occurrence(unsigned long *ascii_val, unsigned short sz) { unsigned short i; for(i = 0; i < sz; i++) { if(ascii_val[i] != 0 && i == '\n') printf("\nch = (ascii: '\\n', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0 && i == '\t') printf("\nch = (ascii: '\\t', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0 && i == '\f') printf("\nch = (ascii: '\\f', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0 && i == '\r') printf("\nch = (ascii: '\\r', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0 && i == '\a') printf("\nch = (ascii: '\\a', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0 && i == '\b') printf("\nch = (ascii: '\\b', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, ascii_val[i]); else if(ascii_val[i] != 0) printf("\nch = (ascii: '%2c', dec: '%3d', hex: '%2x'). occurrence = % d", i, i, i, ascii_val[i]); } } void print_ch_used_most(unsigned long *ascii_val, unsigned short sz) { unsigned long highest_count; unsigned long high_parts[50]; float parts; unsigned short i; unsigned short n; for(i = 0, highest_count = 0; i < sz; i++) { if(ascii_val[i] > highest_count) highest_count = ascii_val[i]; } for(i = 0, parts = 0.0; i < 50; i++) { parts += (float)0.02; high_parts[i] = (unsigned long)(highest_count * parts); } for(i = 0; i < sz; i++) { for(n = 0; n <= 50; n++) { if(ascii_val[i] == 0) continue; if(ascii_val[i] <= high_parts[n]) { print_stars(i, ascii_val[i], n, 0.0); break; } } } } void print_ch_percentage_used(unsigned long *ascii_val, unsigned short sz, unsign ed long total_bytes) { unsigned short i; float percent; double occrrence; for(i = 0; i < sz; i++) { if(ascii_val[i] == 0) continue; occrrence = ascii_val[i]; percent = (float)(occrrence / total_bytes)*100; print_stars(i, ascii_val[i], (unsigned short)percent, percent); } } void print_stars(int ch, unsigned long occrrence, unsigned short star_num, float percent) { int n; printf("\n"); if(ch == '\n') printf("'\\n' (%6d) | ", occrrence); else if(ch == '\t') printf("'\\t' (%6d) | ", occrrence); else if(ch == '\f') printf("'\\f' (%6d) | ", occrrence); else if(ch == '\r') printf("'\\r' (%6d) | ", occrrence); else if(ch == '\a') printf("'\\a' (%6d) | ", occrrence); else if(ch == '\b') printf("'\\b' (%6d) | ", occrrence); else printf("'%2c' (%6d) | ", ch, occrrence); if(percent != 0) printf("%.2f%% ", percent); for(n = 0; n < star_num; n++) printf("*"); } int str_mid(unsigned long start, unsigned long count, char *buffer, unsigned long buffer_size, char *mid_str, unsigned long mid_str_size) { unsigned long i; unsigned long n; if(start >= buffer_size || (start + count) >= buffer_size || count >= mid_str _size) return 0; for(i = 0; i < start; i++) buffer++; for(i = 0; i < count; i++) mid_str[i] = buffer[i]; return 1; } unsigned long src_buffer_for_str_occurrence(char *buffer, unsigned long buffer_si ze, char *src_str, unsigned long src_str_len) { unsigned long str_occurrence = 0; unsigned long i; unsigned long n; for(i = 0; i < buffer_size; i++) { char *buff_cmp = calloc(1, src_str_len+1); if(str_mid(i, src_str_len, buffer, buffer_size, buff_cmp, src_str_len+1)) { if(!strcmp(src_str, buff_cmp)) str_occurrence += 1; } free(buff_cmp); } return str_occurrence; }