// // $Id: convertLog.cc,v 1.2 2002/11/30 01:38:45 dredd Exp $ // // $Source: /cvsroot/hammerhead/hammerhead/utils/convertLog.cc,v $ // $Revision: 1.2 $ // $Date: 2002/11/30 01:38:45 $ // $State: Exp $ // // Author: Jon Gifford // // Purpose: Convert a log file into a set of scenarios // #include #include #include #include #include #include #include #include #include "dictionary.h" #include "str.h" #ifndef SunOS #include "hrtime.h" #endif extern char * strptime (const char * buf, const char * format, struct tm * tm); class knownIp { public: String ipAddress; int ipCount; time_t ipTime; String ipRequest; long scenarioNum; }; void destroy(knownIp **) { } #define MAXTHINK 60 void printScenario(FILE *outF, const String& ipA, const String& ipR, long thinkT, int cnt, long scenarioNum, long nextScenarioNum) { #if 0 struct in_addr ipAddr; #ifdef SunOS ipAddr.S_un.S_addr = ipA; #else ipAddr.s_addr = ipA; #endif #endif fprintf(outF, "#%s #%d\nN%lx\nR%s\n", ipA.c_str(), cnt, scenarioNum, ipR.c_str()); if (thinkT >= 0 && thinkT < MAXTHINK) { fprintf(outF, "S%lx\nT%ld\n", nextScenarioNum, thinkT * 1000000); } fprintf(outF, "X%d\n.\n", (scenarioNum == -1) ? 0 : 1); } main(int argc, char *argv[]) { if (argc != 3) { cerr << "Usage: " << argv[0] << " logFileName scenarioFileName\n"; exit(1); } FILE *inFile; if (argv[1][0] == '-') { // stdin redirect inFile = stdin; } else { inFile = fopen(argv[1], "r"); if (inFile == NULL) { cerr << "Error: Unable to open log file: " << argv[1] << endl; exit(2); } } FILE *outFile; if (argv[2][0] == '-') { // stdout outFile = stdout; } else { outFile = fopen(argv[2], "w"); if (outFile == NULL) { cerr << "Error: unable to open scenario file: " << argv[2] << endl; exit(3); } } Dictionary knownIps; int lineNum = 0; int created = 0; int longestSeq = 0; hrtime_t fileStart = gethrtime(); char inBuf[BUFSIZ]; while (fgets(inBuf, BUFSIZ, inFile)) { lineNum++; if (lineNum % 5000 == 0) { hrtime_t currT = gethrtime(); double elapsed = (currT - fileStart)/1e9; double scenPerSec = lineNum/elapsed; fprintf(stderr, " %d (%d,%.0f,%.0f)", lineNum, knownIps.num_elements(), elapsed, scenPerSec); } inBuf[strlen(inBuf) - 1] = '\0'; if (strlen(inBuf) == 0) { // blank line continue; } char *ipStr = strtok(inBuf, " "); String source = ipStr; #if 0 // resolve it? unsigned long source = inet_addr(ipStr); if ((long )source == -1 || (long )source == 0) { cerr << "\nWarning: illegal ip address " << ipStr << " - Ignoring line " << lineNum << "\n"; continue; } #endif // now parse the rest of the line... char *tempToken = strtok(0, "["); char *time = strtok(0, "]"); if (time == 0) { cerr << "\nWarning: No Date/Time string found" << " - Ignoring line " << lineNum << "\n"; continue; } tempToken = strtok(0, "\""); char *request = strtok(0, "\""); if (request == 0) { cerr << "\nWarning: No request string found" << " - Ignoring line " << lineNum << "\n"; continue; } struct tm cTm; char *nextC = strptime(time, "%d/%b/%Y:%T", &cTm); if (nextC == 0) { cerr << "\nWarning: Illegal Date/Time string " << time << " - Ignoring line " << lineNum << "\n"; continue; } time_t cTime = mktime(&cTm); knownIp *ip; if (knownIps.exists(ipStr)) { ip = knownIps[ipStr]; long deltaT = cTime - ip->ipTime; printScenario(outFile, ip->ipAddress, ip->ipRequest, deltaT, ip->ipCount, ip->scenarioNum, lineNum); if (deltaT > MAXTHINK) { // assume new connection if think time too long... // initial request to / is not being logged, so fake it // whenever a new ip address is found. printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0", 0, ip->ipCount, -1, lineNum); } ip->ipCount++; } else { ip = new knownIp(); knownIps[ipStr] = ip; ip->ipAddress = source; ip->ipCount = 1; // initial request to / is not being logged, so fake it // whenever a new ip address is found. printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0", 0, 0, -1, lineNum); } ip->ipTime = cTime; ip->ipRequest = request; ip->scenarioNum = lineNum; created++; } knownIp **allElements = knownIps.element_array(); for (int i = 0; i < knownIps.num_elements(); i++) { knownIp *ip = allElements[i]; printScenario(outFile, ip->ipAddress, ip->ipRequest, -1, ip->ipCount, ip->scenarioNum, 0); if (ip->ipCount > longestSeq) { longestSeq = ip->ipCount; } } cerr << "\n\nSUMMARY: Scenarios created\t" << created << "\n\t Input lines discarded\t" << lineNum - created << "\n\t Unique IP addresses\t" << knownIps.size() << "\n\t Longest Sequence\t" << longestSeq << "\n\t Sequences:\n\t\tLength\tNumber\t%"; int arrayEnd = knownIps.num_elements(); for (int j = 1; j <= longestSeq; j++) { int cnt = 0; for (int i = 0; i < arrayEnd; ) { knownIp *ip = allElements[i]; if (ip->ipCount == j) { cnt++; allElements[i] = allElements[--arrayEnd]; delete ip; } else i++; } if (cnt > 0) { cerr << "\n\t\t" << j << "\t" << cnt << "\t" << (int )(((double)cnt*j/(double)created)*100000)/1000.0; } } delete allElements; cerr << "\n\n"; }