/* humanunzip - restores files compressed by humanzip Copyright (C) 2007 Matthew Strait This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ using namespace std; #include #include #include #include #include #include "humanzip.h" static int keeporig = 0; // keep the input files? static int tostdout = 0; // write to stdout? static int overwrite = 0; // overwrite existing files? static void handle_cmdline(vector & filenames, int argc, char ** argv) { int done = 0; const char * opts = "fckf:h?"; while(!done){ char c; switch(c = getopt(argc, argv, opts)) { case -1: done = 1; break; case 'f': overwrite = 1; break; case 'c': keeporig = 1; tostdout = 1; break; case 'k': keeporig = 1; break; case 'h': case '?': default: cerr << "humanunzip v" << HUMANVERSION << " © 2007 Matthew Strait\n" << "humanunzip comes with ABSOLUTELY NO WARRANTY. " << "This is free software\n" << "and you may redistribute it under the terms of the GPLv2.\n" << "\n" << "Syntax: humanunzip file.hz\n" << "\n" << "If no file is specified, humanunzip reads from stdin.\n" << "The uncompressed version is written to stdout. Options:\n" << "\n" << "-k\tKeep the compressed files instead of deleting them.\n" << "-c\tSend output to stdout and keep compresed files.\n" << "-f\tForce overwriting of existing output files.\n" << "-h\tPrint this help and exit.\n" << "\n"; exit(1); break; } } for(int i = optind; i < argc; i++) filenames.push_back(argv[i]); if(filenames.size() == 0) filenames.push_back("/dev/stdin"); } static int invalidkey(const string & line, int pos) { cerr << "Got invalid key line at byte " << pos << ": " << line << endl; return -1; } static string convertlinebreaks(const string & s) { string answer; for(unsigned int i = 0; i < s.size(); i++){ if(i < s.size()-1 && s.substr(i, 2) == "¶"){ answer += '\n'; i++; } else if(i < s.size()-2 && s.substr(i, 3) == "⁋"){ answer += '\r'; i += 2; } else answer += s[i]; } return answer; } static int parsekeyline(searchreplace & sr, const string & line) { unsigned int i = 0; while(1){ if(i >= line.size() || (line[i] > 0 && line[i] != '/') || sr.replacelower.size() > 4) return invalidkey(line, i); if(line[i] == '/'){ i++; break; } sr.replacelower += line[i]; i++; } while(1){ if(i >= line.size() || (line[i] > 0 && line[i] != ' ') || sr.replacelower.size()>4) invalidkey(line, i); if(line[i] == ' '){ i++; break; } sr.replaceupper += line[i]; i++; } if(i>=line.size() || line[i] != '-') return invalidkey(line, i); i++; if(i>=line.size() || line[i] != ' ') return invalidkey(line, i); i++; while(i < line.size()){ sr.search += line[i]; i++; } if(sr.replacelower.size() < 2 || sr.replaceupper.size() < 2 || sr.search.size() < 2) return invalidkey(line, -1); sr.search = convertlinebreaks(sr.search); return 1; } // Returns 0 on sucess, -1 on error. Sends back the last line that caused // an error, if it was expected to be a key line. static int getrt(string & errorline, vector & rt, ifstream & infile, const string & filename) { int returnvalue = 0; errorline = ""; string line; getline(infile, line); if(line != "ĦŨӍĄŅŽȈƤǷΈÐ"){ // Maybe the user removed the magic because he didn't like how it looked cerr << filename << " doesn't look like it is humanzipped.\n" "I'll try unzipping it anyway, keeping the original.\n"; keeporig = 1; returnvalue = -1; // try parsing the top line as a key line searchreplace sr; if(-1 == parsekeyline(sr, line)){ cerr << "I'll try unzipping it anyway, keeping the original.\n"; errorline = line; } else rt.push_back(sr); } // handle key lines while(getline(infile, line)){ if(line == "") break; // end of table searchreplace sr; if(-1 == parsekeyline(sr, line)){ cerr << "I'll try unzipping it anyway, keeping the original.\n"; errorline = line; returnvalue = -1; keeporig = 1; } else rt.push_back(sr); } return returnvalue; } static string findsearch(string replace, const vector & rt) { for(unsigned int i = 0; i < rt.size(); i++){ // Even though humanzip only writes lines with the first letter raised // let's do this symmetrically so that we can tolerate changes. if(replace == rt[i].replacelower) return lowerfirstletter(rt[i].search); if(replace == rt[i].replaceupper) return raisefirstletter(rt[i].search); } cerr << "Didn't find original string!\n"; return replace; } static string getutf8(ifstream & infile, char c) { string uchar; int width = 1; if ((c & 0xe0) == 0xc0) width = 2; else if((c & 0xf0) == 0xe0) width = 3; else if((c & 0xf8) == 0xf0) width = 4; if(width == 1){ cerr << "Got bad UTF-8 character!\n"; return ""; } for(int i = 0; i < width; i++){ infile.get(c); uchar += c; } return uchar; } // takes, e.g. "hello.hz" and returns "hello" static string mkfn(const string & filename) { if(filename.substr(filename.size()-3, 3) == ".hz") return filename.substr(0, filename.size()-3); else return filename + ".out"; } // Returns 1 on sucess, -1 on error static int doreplacement(const vector & rt, ifstream & infile, ofstream & outfile) { int returnvalue = 1; char c; while((c = infile.peek()) != EOF){ if(c >= 0){ infile.get(c); outfile << c; } else{ string replace = getutf8(infile, c); string search = findsearch(replace, rt); if(search == "") returnvalue = -1; outfile << findsearch(replace, rt); } } return returnvalue; } // Checks if we're ok with writing out the uncompressed file // returns 1 on sucess, 0 on failure static int checkoutfile(const string & filename) { if(tostdout) return 1; // stdout had better always be ok string outfilename = mkfn(filename); // if the file already exists and we're not willing to ovewrite it, fail struct stat buffer; if(!overwrite && (-1 != stat(outfilename.c_str(), &buffer))){ cerr << "Output file " << outfilename << " already exists, skipping.\n"; return 0; } // if the file does not exist, or it does and we're willing to overwrite // it, check if we can actually write to it ofstream outfile(outfilename.c_str()); if(!outfile.is_open()){ cerr << "Couldn't open " << outfilename << " for writing!\n"; return 0; } else{ outfile.close(); return 1; } } // Returns 1 on failure, 0 on sucess static int doit(const string & filename) { int returnvalue = 0; if(!checkoutfile(filename)) return 1; ifstream infile(filename.c_str()); if(!infile.is_open()){ cerr << "Couldn't open " << filename << ". Skipping...\n"; return 1; } string outfilename; if(filename == "/dev/stdin" || tostdout) outfilename = "/dev/stdout"; else outfilename = mkfn(filename); ofstream outfile(outfilename.c_str()); if(!outfile.is_open()){ cerr << "Couldn't open " << outfilename << " for writing\n"; return 1; } vector replacementtable; string errorline = ""; if(-1 == getrt(errorline, replacementtable, infile, filename)) returnvalue = 1; // Was expecting a key line, got this. Maybe the blank line was deleted? if(returnvalue == 1 && errorline != "") outfile << errorline << endl; if(-1 == doreplacement(replacementtable, infile, outfile)) returnvalue = 1; if(!keeporig) unlink(filename.c_str()); return returnvalue; } int main(int argc, char ** argv) { int nfailed = 0; vector filenames; handle_cmdline(filenames, argc, argv); for(unsigned int i = 0; i < filenames.size(); i++) nfailed += doit(filenames[i]); return nfailed; }