//
// $Id: convertLog.cc,v 1.2 2002/11/30 01:38:45 dredd Exp $
//
// $Source: /cvsroot/hammerhead/hammerhead/utils/convertLog.cc,v $
// $Revision: 1.2 $
// $Date: 2002/11/30 01:38:45 $
// $State: Exp $
//
// Author: Jon Gifford
//
// Purpose: Convert a log file into a set of scenarios
//
#include <time.h>
#include <stdio.h>
#include <iostream.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "dictionary.h"
#include "str.h"
#ifndef SunOS
#include "hrtime.h"
#endif
extern char * strptime (const char * buf, const char * format, struct tm * tm);
class knownIp
{
public:
String ipAddress;
int ipCount;
time_t ipTime;
String ipRequest;
long scenarioNum;
};
void destroy(knownIp **) { }
#define MAXTHINK 60
void
printScenario(FILE *outF, const String& ipA, const String& ipR, long thinkT, int cnt,
long scenarioNum, long nextScenarioNum)
{
#if 0
struct in_addr ipAddr;
#ifdef SunOS
ipAddr.S_un.S_addr = ipA;
#else
ipAddr.s_addr = ipA;
#endif
#endif
fprintf(outF, "#%s #%d\nN%lx\nR%s\n",
ipA.c_str(), cnt, scenarioNum, ipR.c_str());
if (thinkT >= 0 && thinkT < MAXTHINK)
{
fprintf(outF, "S%lx\nT%ld\n",
nextScenarioNum, thinkT * 1000000);
}
fprintf(outF, "X%d\n.\n", (scenarioNum == -1) ? 0 : 1);
}
main(int argc, char *argv[])
{
if (argc != 3)
{
cerr << "Usage: " << argv[0] << " logFileName scenarioFileName\n";
exit(1);
}
FILE *inFile;
if (argv[1][0] == '-')
{
// stdin redirect
inFile = stdin;
}
else
{
inFile = fopen(argv[1], "r");
if (inFile == NULL)
{
cerr << "Error: Unable to open log file: " << argv[1] << endl;
exit(2);
}
}
FILE *outFile;
if (argv[2][0] == '-')
{
// stdout
outFile = stdout;
}
else
{
outFile = fopen(argv[2], "w");
if (outFile == NULL)
{
cerr << "Error: unable to open scenario file: " << argv[2] << endl;
exit(3);
}
}
Dictionary<knownIp *> knownIps;
int lineNum = 0;
int created = 0;
int longestSeq = 0;
hrtime_t fileStart = gethrtime();
char inBuf[BUFSIZ];
while (fgets(inBuf, BUFSIZ, inFile))
{
lineNum++;
if (lineNum % 5000 == 0)
{
hrtime_t currT = gethrtime();
double elapsed = (currT - fileStart)/1e9;
double scenPerSec = lineNum/elapsed;
fprintf(stderr, " %d (%d,%.0f,%.0f)",
lineNum, knownIps.num_elements(),
elapsed, scenPerSec);
}
inBuf[strlen(inBuf) - 1] = '\0';
if (strlen(inBuf) == 0)
{
// blank line
continue;
}
char *ipStr = strtok(inBuf, " ");
String source = ipStr;
#if 0
// resolve it?
unsigned long source = inet_addr(ipStr);
if ((long )source == -1 || (long )source == 0)
{
cerr << "\nWarning: illegal ip address " << ipStr
<< " - Ignoring line " << lineNum << "\n";
continue;
}
#endif
// now parse the rest of the line...
char *tempToken = strtok(0, "[");
char *time = strtok(0, "]");
if (time == 0)
{
cerr << "\nWarning: No Date/Time string found"
<< " - Ignoring line " << lineNum << "\n";
continue;
}
tempToken = strtok(0, "\"");
char *request = strtok(0, "\"");
if (request == 0)
{
cerr << "\nWarning: No request string found"
<< " - Ignoring line " << lineNum << "\n";
continue;
}
struct tm cTm;
char *nextC = strptime(time, "%d/%b/%Y:%T", &cTm);
if (nextC == 0)
{
cerr << "\nWarning: Illegal Date/Time string " << time
<< " - Ignoring line " << lineNum << "\n";
continue;
}
time_t cTime = mktime(&cTm);
knownIp *ip;
if (knownIps.exists(ipStr))
{
ip = knownIps[ipStr];
long deltaT = cTime - ip->ipTime;
printScenario(outFile, ip->ipAddress, ip->ipRequest,
deltaT, ip->ipCount, ip->scenarioNum, lineNum);
if (deltaT > MAXTHINK)
{
// assume new connection if think time too long...
// initial request to / is not being logged, so fake it
// whenever a new ip address is found.
printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0",
0, ip->ipCount, -1, lineNum);
}
ip->ipCount++;
}
else
{
ip = new knownIp();
knownIps[ipStr] = ip;
ip->ipAddress = source;
ip->ipCount = 1;
// initial request to / is not being logged, so fake it
// whenever a new ip address is found.
printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0",
0, 0, -1, lineNum);
}
ip->ipTime = cTime;
ip->ipRequest = request;
ip->scenarioNum = lineNum;
created++;
}
knownIp **allElements = knownIps.element_array();
for (int i = 0; i < knownIps.num_elements(); i++)
{
knownIp *ip = allElements[i];
printScenario(outFile, ip->ipAddress, ip->ipRequest, -1,
ip->ipCount, ip->scenarioNum, 0);
if (ip->ipCount > longestSeq)
{
longestSeq = ip->ipCount;
}
}
cerr << "\n\nSUMMARY: Scenarios created\t" << created
<< "\n\t Input lines discarded\t" << lineNum - created
<< "\n\t Unique IP addresses\t" << knownIps.size()
<< "\n\t Longest Sequence\t" << longestSeq
<< "\n\t Sequences:\n\t\tLength\tNumber\t%";
int arrayEnd = knownIps.num_elements();
for (int j = 1; j <= longestSeq; j++)
{
int cnt = 0;
for (int i = 0; i < arrayEnd; )
{
knownIp *ip = allElements[i];
if (ip->ipCount == j)
{
cnt++;
allElements[i] = allElements[--arrayEnd];
delete ip;
}
else
i++;
}
if (cnt > 0)
{
cerr << "\n\t\t" << j << "\t" << cnt << "\t"
<< (int )(((double)cnt*j/(double)created)*100000)/1000.0;
}
}
delete allElements;
cerr << "\n\n";
}
syntax highlighted by Code2HTML, v. 0.9.1