/* MD5DEEP - files.c * * By Jesse Kornblum * * This is a work of the US Government. In accordance with 17 USC 105, * copyright protection is not available for any work of the US Government. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * */ // $Id: files.c,v 1.11 2007/09/23 01:54:22 jessekornblum Exp $ #include "main.h" /* --------------------------------------------------------------------- How to add more file types that we can read known hashes from: 1. Add a definition of TYPE_[fileType] to main.h. Ex: for type "cows", you would want to define TYPE_COWS. 2. If your filetype has a rigid header, you should define a HEADER variable to make comparisons easier. 3. Add a check for your file type to the hash_file_type() function. 4. Create a method to find a valid hash and filename in a line of your file. You are encouraged to use find_plain_hash and find_rigid_hash if possible! 5. Add this method to the find_hash_in_line() function. 6. Add a variable to the state in main.h to indicate if each filetype supports this hash. The variable can be used to denote a position if necessary. Look for h_[name] variables for examples. 7. Each hashing algorithm should set this variable in the setup_hashing_algorithm function. ---------------------------------------------------------------------- */ typedef struct _ENCASE_HASH_HEADER { /* 000 */ char Signature[8]; /* 008 */ uint32_t Version; /* 00c */ uint32_t Padding; /* 010 */ uint32_t NumHashes; } ENCASE_HASH_HEADER; #define ENCASE_HEADER "HASH\x0d\x0a\xff\x00" #define ILOOK_HEADER \ "V1Hash,HashType,SetDescription,FileName,FilePath,FileSize" #define NSRL_15_HEADER \ "\"SHA-1\",\"FileName\",\"FileSize\",\"ProductCode\",\"OpSystemCode\",\"MD4\",\"MD5\",\"CRC32\",\"SpecialCode\"" #define NSRL_20_HEADER \ "\"SHA-1\",\"MD5\",\"CRC32\",\"FileName\",\"FileSize\",\"ProductCode\",\"OpSystemCode\",\"SpecialCode\"" #define HASHKEEPER_HEADER \ "\"file_id\",\"hashset_id\",\"file_name\",\"directory\",\"hash\",\"file_size\",\"date_modified\",\"time_modified\",\"time_zone\",\"comments\",\"date_accessed\",\"time_accessed\"" #define HASH_STRING_LENGTH (s->hash_length * 2) int valid_hash(state *s, char *buf) { size_t pos = 0; if (strlen(buf) < HASH_STRING_LENGTH) return FALSE; for (pos = 0 ; pos < HASH_STRING_LENGTH ; pos++) if (!isxdigit(buf[pos])) return FALSE; return TRUE; } // Remove the newlines, if any. Works on both DOS and *nix newlines void chop_line(char *s) { size_t pos = strlen(s); if (s[pos - 2] == '\r' && s[pos - 1] == '\n') s[pos - 2] = 0; else if (s[pos-1] == '\n') s[pos - 1] = 0; } int find_plain_hash(state *s, char *buf, char *known_fn) { size_t p = HASH_STRING_LENGTH; if (buf == NULL) return FALSE; if ((strlen(buf) < HASH_STRING_LENGTH) || (buf[HASH_STRING_LENGTH] != ' ')) return FALSE; if (known_fn != NULL) { strncpy(known_fn,buf,PATH_MAX); // Starting at the end of the hash, find the start of the filename while(p < strlen(known_fn) && isspace(known_fn[p])) ++p; shift_string(known_fn,0,p); chop_line(known_fn); } buf[HASH_STRING_LENGTH] = 0; /* We have to include a validity check here so that we don't mistake SHA-1 hashes for MD5 hashes, among other things */ return (valid_hash(s,buf)); } int find_md5deep_size_hash(state *s, char *buf, char *known_fn) { size_t pos; if (NULL == buf) return FALSE; // Extra 12 chars for size and space if (strlen(buf) < HASH_STRING_LENGTH + 12) return FALSE; // Check for size. Spaces are legal here (e.g. " 20") for (pos = 0 ; pos < 10 ; ++pos) if (!(isdigit(buf[pos]) || 0x20 == buf[pos])) return FALSE; if (buf[10] != 0x20 && buf[11] != 0x20) return FALSE; shift_string(buf,0,12);; return find_plain_hash(s,buf,known_fn); } int find_bsd_hash(state *s, char *buf, char *fn) { char *temp; size_t buf_len = strlen(buf); unsigned int pos = 0, hash_len = HASH_STRING_LENGTH, first_paren, second_paren; if (buf == NULL || buf_len < hash_len) return FALSE; while (pos < buf_len && buf[pos] != '(') ++pos; /* The hash always comes after the file name, so there has to be enough room for the filename and *then* the hash. */ if (pos + hash_len + 1 > buf_len) return FALSE; first_paren = pos; /* We only need to check back as far as the opening parenethsis, not the start of the string. If the closing paren comes before the opening paren (e.g. )( ) then the line is not valid */ pos = buf_len - hash_len; while (pos > first_paren && buf[pos] != ')') --pos; if (pos == first_paren) return FALSE; second_paren = pos; if (fn != NULL) { temp = strdup(buf); temp[second_paren] = 0; // The filename starts one character after the first paren shift_string(temp,0,first_paren+1); strncpy(fn,temp,PATH_MAX); free(temp); } /* We chop instead of setting buf[HASH_STRING_LENGTH] = 0 just in case there is extra data. We don't want to chop up longer (possibly invalid) data and take part of it as a valid hash! */ chop_line(buf); // The hash always begins four characters after the second paren shift_string(buf,0,second_paren+4); return (valid_hash(s,buf)); } /* This is a generic function to find the filename and hash from a rigid (i.e. comma separated value) file format. Values may be quoted, but the quotes are removed before values are returned. The location variables refer to how many commas preceed the entry. For example, to get the hash out of: filename,junk,stuff,hash,stuff you should call find_rigid_hash(buf,fn,0,3); */ int find_rigid_hash(state *s, char *buf, char *fn, unsigned int fn_location, unsigned int hash_location) { char *temp = strdup(buf); if (temp == NULL) return FALSE; if (find_comma_separated_string(temp,fn_location)) { free(temp); return FALSE; } strncpy(fn, temp, strlen(fn)); free(temp); if (find_comma_separated_string(buf,hash_location)) return FALSE; return valid_hash(s,buf); } #ifdef WORDS_BIGENDIAN uint32_t byte_reverse(uint32_t n) { uint32_t res = 0, count, bytes[5]; for (count = 0 ; count < 4 ; count++) { bytes[count] = (n & (0xff << (count * 8))) >> (count * 8); res |= bytes[count] << (24 - (count * 8)); } return res; } #endif /* iLook files have the MD5 hash as the first 32 characters. As a result, we can just treat these files like plain hash files */ int find_ilook_hash(state *s, char *buf, char *known_fn) { if (s->h_ilook) return (find_plain_hash(s,buf,known_fn)); else return FALSE; } static int check_for_encase(state *s, FILE *f) { ENCASE_HASH_HEADER *h = (ENCASE_HASH_HEADER *)malloc(sizeof(ENCASE_HASH_HEADER)); if (NULL == h) fatal_error(s,"Out of memory"); if (sizeof(ENCASE_HASH_HEADER) != fread(h,1,sizeof(ENCASE_HASH_HEADER),f)) { free(f); return FALSE; } if (memcmp(h->Signature,ENCASE_HEADER,8)) { rewind(f); free(h); return FALSE; } #ifdef WORDS_BIGENDIAN h->NumHashes = byte_reverse(h->NumHashes); #endif s->expected_hashes = h->NumHashes; return TRUE; } int hash_file_type(state *s, FILE *f) { char *known_fn; char buf[MAX_STRING_LENGTH + 1]; rewind(f); /* The "rigid" file types all have their headers in the first line of the file. We check them first */ if (s->h_encase) { if (check_for_encase(s,f)) return TYPE_ENCASE; } if ((fgets(buf,MAX_STRING_LENGTH,f)) == NULL) return TYPE_UNKNOWN; if (strlen(buf) > HASH_STRING_LENGTH) { chop_line(buf); if (s->h_hashkeeper) { if (STRINGS_EQUAL(buf,HASHKEEPER_HEADER)) return TYPE_HASHKEEPER; } if (s->h_nsrl15) { if (STRINGS_EQUAL(buf,NSRL_15_HEADER)) return TYPE_NSRL_15; } if (s->h_nsrl20) { if (STRINGS_EQUAL(buf,NSRL_20_HEADER)) return TYPE_NSRL_20; } if (s->h_ilook) { if (STRINGS_EQUAL(buf,ILOOK_HEADER)) return TYPE_ILOOK; } } /* Plain files can have comments, so the first line(s) may not contain a valid hash. But if we should process this file if we can find even *one* valid hash */ known_fn = (char *)malloc(sizeof(char) * PATH_MAX); do { if (find_bsd_hash(s,buf,known_fn)) { free(known_fn); return TYPE_BSD; } if (find_md5deep_size_hash(s,buf,known_fn)) { free(known_fn); return TYPE_MD5DEEP_SIZE; } if (find_plain_hash(s,buf,known_fn)) { free(known_fn); return TYPE_PLAIN; } } while ((fgets(buf,MAX_STRING_LENGTH,f)) != NULL); free(known_fn); return TYPE_UNKNOWN; } /* Given an input string buf and the type of file it came from, finds the hash specified in the line if there is one and returns TRUE. If there is no valid hash in the line, returns FALSE. All functions called from here are required to check that the hash is valid before returning! */ int find_hash_in_line(state *s, char *buf, int fileType, char *fn) { switch(fileType) { case TYPE_PLAIN: return find_plain_hash(s,buf,fn); case TYPE_BSD: return find_bsd_hash(s,buf,fn); case TYPE_HASHKEEPER: return (find_rigid_hash(s,buf,fn,2,s->h_hashkeeper)); case TYPE_NSRL_15: return (find_rigid_hash(s,buf,fn,1,s->h_nsrl15)); break; case TYPE_NSRL_20: return (find_rigid_hash(s,buf,fn,3,s->h_nsrl20)); case TYPE_ILOOK: return (find_ilook_hash(s,buf,fn)); case TYPE_MD5DEEP_SIZE: return (find_md5deep_size_hash(s,buf,fn)); } return FALSE; }