/*
* ----------------------------------------------------------------------------
* "THE BEER-WARE LICENSE" (Revision 42):
* <csjp@sqrt.ca> wrote this file. As long as you retain this notice you
* can do whatever you want with this stuff. If we meet some day, and you think
* this stuff is worth it, you can buy me a beer in return. Christian SJ Peron
* ----------------------------------------------------------------------------
*
*/
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <vis.h>
#include <ctype.h>
#include <assert.h>
static int sort, nsymbols, symbwidth, probwidth, ientwidth, tentwidth;
static char *tflag;
#define ENTROPY_VERSION "2.0"
#define HDRSIZE(x) (sizeof(x) - 1)
#define SYMHEADER "SYMBOL"
#define PROBHEADER "PROB."
#define IENTHEADER "I-ENTROPY"
#define TENTHEADER "T-ENTROPY"
#define NSYMBOLS 0xFFU
#define SYMBOL_FOREACH(x) for (x = 0; x < NSYMBOLS; x++)
struct cinfo {
int freq;
char *symbol;
char *sfreq;
char *ientr;
char *tentr;
};
static struct cinfo table[NSYMBOLS];
static inline unsigned
bitsperchar(void)
{
unsigned ret;
char x;
ret = 0;
x = 0x1;
#ifdef _LITTLE_ENDIAN
for (; x; x <<=1)
#else
for (; x; x >>=1)
#endif /* XXX PDP ENDIAN? */
ret++;
return (ret);
}
static int
cinfo_cmp(struct cinfo *a, struct cinfo *b)
{
if (a->freq > b->freq)
return (-1);
else
return (1);
}
static void
usage(void)
{
fprintf(stderr,
"usage: entropy [-s] [-t string]\n");
exit(1);
}
int
main(int argc, char *argv [])
{
char *sfmt, buf[64];
double Oentropy;
FILE *fp;
int c, set, ch, i;
float ientr, tentr;
while ((ch = getopt(argc, argv, "st:V")) != -1)
switch (ch) {
case 's':
sort++;
break;
case 't':
tflag = optarg;
break;
case 'V':
printf("entropy version %s\n",
ENTROPY_VERSION);
exit(0);
break;
default:
usage();
}
fp = stdin;
if (tflag) {
set = strlen(tflag);
while (*tflag)
table[(unsigned)*tflag++].freq++;
} else {
set = 0;
while ((c = getc(fp)) != EOF) {
set++;
table[(unsigned)c].freq++;
}
}
symbwidth = HDRSIZE(SYMHEADER);
probwidth = HDRSIZE(PROBHEADER);
ientwidth = HDRSIZE(IENTHEADER);
tentwidth = HDRSIZE(TENTHEADER);
Oentropy = 0;
SYMBOL_FOREACH(i) {
if (table[i].freq == 0)
continue;
nsymbols++;
ientr = log((double)table[i].freq / (double)set) / log(2);
tentr = ientr * (double)table[i].freq;
Oentropy += tentr;
sfmt = isgraph(i) ? "'%c'" : "0x%02x";
snprintf(buf, sizeof(buf) - 1, sfmt, i);
table[i].symbol = strdup(buf);
ch = snprintf(buf, sizeof(buf) - 1, "%u/%u",
table[i].freq, set);
table[i].sfreq = strdup(buf);
if (ch > probwidth)
probwidth = ch;
ch = snprintf(buf, sizeof(buf) - 1, "%f", ientr * -1);
table[i].ientr = strdup(buf);
if (ch > ientwidth)
ientwidth = ch;
ch = snprintf(buf, sizeof(buf) - 1, "%f", tentr * -1);
table[i].tentr = strdup(buf);
if (ch > tentwidth)
tentwidth = ch;
}
if (sort)
qsort(&table[0], NSYMBOLS, sizeof(struct cinfo),
(void *)cinfo_cmp);
printf("%-*.*s %-*.*s %-*.*s %-*.*s\n",
symbwidth, symbwidth, SYMHEADER,
probwidth, probwidth, PROBHEADER,
ientwidth, ientwidth, IENTHEADER,
tentwidth, tentwidth, TENTHEADER);
SYMBOL_FOREACH(i) {
if (table[i].freq == 0)
continue;
printf("%-*.*s %-*.*s %-*.*s %-*.*s\n",
symbwidth, symbwidth, table[i].symbol,
probwidth, probwidth, table[i].sfreq,
ientwidth, ientwidth, table[i].ientr,
tentwidth, tentwidth, table[i].tentr);
free(table[i].symbol);
free(table[i].sfreq);
free(table[i].ientr);
free(table[i].tentr);
}
printf("MAX theorectical compression: %%%03.3f\n",
(100 * (1 - ((Oentropy / (double)(set * bitsperchar())) * -1))));
printf("DISTINCT SYMBOLS: %d (%u contiguous bits per symbol)\n",
nsymbols, bitsperchar());
return (0);
}
syntax highlighted by Code2HTML, v. 0.9.1