/* 入力のセットを管理するコード
 *
 * Copyright (C) 2006 HANAOKA Toshiyuki
 * Copyright (C) 2006-2007 TABATA Yusuke
 *
 * Special Thanks: Google Summer of Code Program 2006
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "input_set.h"

#define HASH_SIZE 1024

struct int_map_node {
  int key;
  int val;
  struct int_map_node *next;
};

struct int_map {
  /**/
  int nr;
  struct int_map_node *hash_head;
  /**/
  int array_size;
  struct int_map_node **array;
};

struct input_set {
  /**/
  struct input_line *lines;
  struct input_line *buckets[HASH_SIZE];
  /**/
  struct int_map *feature_freq;
};

static int
line_hash(const int *ar, int nr)
{
  int i;
  unsigned h = 0;
  for (i = 0; i < nr; i++) {
    h += ar[i];
  }
  return (h % HASH_SIZE);
}

static struct input_line *
find_same_line(struct input_set *is, int *features, int nr)
{
  struct input_line *il;
  int h = line_hash(features, nr);
  for (il = is->buckets[h]; il; il = il->next_in_hash) {
    int i;
    if (il->nr_features != nr) {
      continue;
    }
    for (i = 0; i < nr; i++) {
      if (il->features[i] != features[i]) {
	break;
      }
    }
    if (i >= nr) {
      return il;
    }
  }
  return NULL;
}

static struct input_line *
add_line(struct input_set *is, int *features, int nr)
{
  int i, h;
  struct input_line *il;
  il = malloc(sizeof(struct input_line));
  il->nr_features = nr;
  il->features = malloc(sizeof(int) * nr);
  for (i = 0; i < nr; i++) {
    il->features[i] = features[i];
  }
  il->weight = 0;
  il->negative_weight = 0;
  /* link */
  il->next_line = is->lines;
  is->lines = il;
  /**/
  h = line_hash(features, nr);
  il->next_in_hash = is->buckets[h];
  is->buckets[h] = il;
  return il;
}

static void
add_feature_count(struct int_map *im, int nr, int *features, int weight)
{
  int i;
  for (i = 0; i < nr; i++) {
    int f = features[i];
    int v = int_map_peek(im, f);
    int_map_set(im, f, v + weight);
  }
}

/* input_setに入力を一つ加える */
void
input_set_set_features(struct input_set *is, int *features,
		       int nr, int weight)
{
  struct input_line *il;
  int abs_weight = abs(weight);

  /**/
  il = find_same_line(is, features, nr);
  if (!il) {
    il = add_line(is, features, nr);
  }
  /**/
  if (weight > 0) {
    il->weight += weight;
  } else {
    il->negative_weight += abs_weight;
  }
  /**/
  add_feature_count(is->feature_freq, nr, features, abs_weight);
}

struct input_set *
input_set_create(void)
{
  int i;
  struct input_set *is;
  is = malloc(sizeof(struct input_set));
  is->lines = NULL;
  /**/
  for (i = 0; i < HASH_SIZE; i++) {
    is->buckets[i] = NULL;
  }
  /**/
  is->feature_freq = int_map_new();
  /**/
  return is;
}

struct input_line *
input_set_get_input_line(struct input_set *is)
{
  return is->lines;
}

struct input_set *
input_set_filter(struct input_set *is,
		 double pos, double neg)
{
  struct input_set *new_is = input_set_create();
  struct input_line *il;
  for (il = is->lines; il; il = il->next_line) {
    if (il->weight > pos ||
	il->negative_weight > neg) {
      input_set_set_features(new_is, il->features,
			     il->nr_features, il->weight);
      input_set_set_features(new_is, il->features,
			     il->nr_features, -il->negative_weight);
    }
  }
  return new_is;
}

void
input_set_output_feature_freq(FILE *fp, struct input_set *is)
{
  int i;
  struct int_map *im = is->feature_freq;
  fprintf(fp, "0 %d\n", im->array_size);
  int_map_flatten(im);
  for (i = 0; i < im->array_size; i++) {
    if (!im->array[i]) {
      fprintf(fp, "0 0\n");
    } else {
      fprintf(fp, "%d %d\n", im->array[i]->key,
	      im->array[i]->val);
    }
  }
}

struct int_map *
int_map_new(void)
{
  int i;
  struct int_map *im = malloc(sizeof(struct int_map));
  im->nr = 0;
  im->hash_head = malloc(sizeof(struct int_map_node) * HASH_SIZE);
  for (i = 0; i < HASH_SIZE; i++) {
    im->hash_head[i].next = NULL;
  }
  return im;
}

static int
node_index(int idx)
{
  return idx % HASH_SIZE;
}

static struct int_map_node *
find_int_map_node(struct int_map *im, int idx)
{
  struct int_map_node *node;
  int h = node_index(idx);
  for (node = im->hash_head[h].next; node; node = node->next) {
    if (node->key == idx) {
      return node;
    }
  }
  return NULL;
}

int
int_map_peek(struct int_map *im, int idx)
{
  struct int_map_node *node = find_int_map_node(im, idx);
  if (node) {
    return node->val;
  }
  return 0;
}

void
int_map_set(struct int_map *im, int idx, int val)
{
  struct int_map_node *node = find_int_map_node(im, idx);
  int h;
  if (node) {
    node->val = val;
    return ;
  }
  /**/
  node = malloc(sizeof(struct int_map_node));
  node->key = idx;
  node->val = val;
  h = node_index(idx);
  node->next = im->hash_head[h].next;
  im->hash_head[h].next = node;
  /**/
  im->nr ++;
}

void
int_map_flatten(struct int_map *im)
{
  int i;
  struct int_map_node *node;
  int max_n = 0;
  /* 配列を準備する */
  im->array_size = im->nr * 2;
  im->array = malloc(sizeof(struct int_map_node *) * 
		     im->array_size);
  for (i = 0; i < im->array_size; i++) {
    im->array[i] = NULL;
  }
  /* 要素を置いていく */
  for (i = 0; i < HASH_SIZE; i++) {
    for (node = im->hash_head[i].next; node; node = node->next) {
      int n = 0;
      while (1) {
	int h;
	h = node->key + n;
	h %= im->array_size;
	if (!im->array[h]) {
	  im->array[h] = node;
	  break;
	}
	/**/
	n++;
      }
      if (n > max_n) {
	max_n = n;
      }
    }
  }
  /**/
  printf(" max_collision=%d\n", max_n);
}


syntax highlighted by Code2HTML, v. 0.9.1