/* -*- mode: C -*-  */
/* 
   IGraph library.
   Copyright (C) 2006  Gabor Csardi <csardi@rmki.kfki.hu>
   MTA RMKI, Konkoly-Thege Miklos st. 29-33, Budapest 1121, Hungary
   
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   02110-1301 USA

*/

#include "igraph.h"
#include "memory.h"
#include "random.h"

#include <string.h>

/**
 * \function igraph_motifs_randesu
 * \brief Count the number of motifs in a graph
 * 
 * </para><para>
 * Motifs are small subgraphs of a given structure in a graph. It is
 * argued that the motif profile (ie. the number of different motifs
 * in the graph) is characteristic for different types of networks and
 * network function is related to the motifs in the graph. 
 * 
 * </para><para>
 * This function is able to find the different motifs of size three
 * and four (ie. the number of different subgraphs with three and four
 * vertices) in the network. (This limitation is the result of the
 * lack of code to decide graph isomorphism for larger graphs.)
 * 
 * </para><para>
 * In a big network the total number of motifs can be very large, so
 * it takes a lot of time to find all of them, a sampling method can
 * be used. This function is capable of doing sampling via the
 * \c cut_prob argument. This argument gives the probability that
 * a branch of the motif search tree will not be explored. See
 * S. Wernicke and F. Rasche: FANMOD: a tool for fast network motif
 * detection, Bioinformatics 22(9), 1152--1153, 2006 for details.
 * 
 * </para><para>
 * Set the \c cut_prob argument to a zero vector for finding all
 * motifs. 
 * 
 * </para><para> 
 * Directed motifs will be counted in directed graphs and undirected
 * motifs in undirected graphs.
 *
 * \param graph The graph to find the motifs in.
 * \param hist The result of the computation, it gives the number of
 *        motifs found for each isomorphism class. See
 *        \ref igraph_isoclass() for help about isomorphism classes.
 * \param size The size of the motifs to search for. Only three and
 *        four are implemented currently. The limitation is not in the
 *        motif finding code, but the graph isomorphism code.
 * \param cut_prob Vector of probabilities for cutting the search tree
 *        at a given level. The first element is the first level, etc.
 *        Supply all zeros here (of length \c size) to find all motifs 
 *        in a graph.
 * \return Error code.
 * \sa \ref igraph_motifs_randesu_estimate() for estimating the number
 * of motifs in a graph, this can help to set the \c cut_prob
 * parameter; \ref igraph_motifs_randesu_no() to calculate the total
 * number of motifs of a given size in a graph.
 * 
 * Time complexity: TODO.
 */

int igraph_motifs_randesu(const igraph_t *graph, igraph_vector_t *hist, 
			  int size, const igraph_vector_t *cut_prob) {

  long int no_of_nodes=igraph_vcount(graph);
  igraph_i_adjlist_t allneis, alloutneis;
  igraph_vector_t *neis;
  long int father;
  long int i, j, s;
  long int motifs=0;

  igraph_vector_t vids;		/* this is G */
  igraph_vector_t adjverts;	/* this is V_E */
  igraph_stack_t stack;		/* this is S */
  long int *added;
  char *subg;
  
  long int histlen;
  unsigned int *arr_idx, *arr_code;
  int code=0;
  unsigned char mul, idx;

  if (size != 3 && size != 4) {
    IGRAPH_ERROR("Only 3 and 4 vertex motifs are implemented",
		 IGRAPH_EINVAL);
  }
  if (size==3) {
    mul=3;
    if (igraph_is_directed(graph)) {
      histlen=16;
      arr_idx=igraph_i_isoclass_3_idx;
      arr_code=igraph_i_isoclass2_3;
    } else {
      histlen=4;
      arr_idx=igraph_i_isoclass_3u_idx;
      arr_code=igraph_i_isoclass2_3u;
    }
  } else {
    mul=4;
    if (igraph_is_directed(graph)) {
      histlen=218;
      arr_idx=igraph_i_isoclass_4_idx;
      arr_code=igraph_i_isoclass2_4;
    } else {
      histlen=11;
      arr_idx=igraph_i_isoclass_4u_idx;
      arr_code=igraph_i_isoclass2_4u;
    }
  }

  IGRAPH_CHECK(igraph_vector_resize(hist, histlen));
  igraph_vector_null(hist);
  
  added=Calloc(no_of_nodes, long int);
  if (added==0) {
    IGRAPH_ERROR("Cannot find motifs", IGRAPH_ENOMEM);
  }
  IGRAPH_FINALLY(igraph_free, added);

  subg=Calloc(no_of_nodes, char);
  if (subg==0) {
    IGRAPH_ERROR("Cannot find motifs", IGRAPH_ENOMEM);
  }
  IGRAPH_FINALLY(igraph_free, subg);

  IGRAPH_CHECK(igraph_i_adjlist_init(graph, &allneis, IGRAPH_ALL));
  IGRAPH_FINALLY(igraph_i_adjlist_destroy, &allneis);  
  IGRAPH_CHECK(igraph_i_adjlist_init(graph, &alloutneis, IGRAPH_OUT));
  IGRAPH_FINALLY(igraph_i_adjlist_destroy, &alloutneis);  

  IGRAPH_VECTOR_INIT_FINALLY(&vids, 0);
  IGRAPH_VECTOR_INIT_FINALLY(&adjverts, 0);
  IGRAPH_CHECK(igraph_stack_init(&stack, 0));
  IGRAPH_FINALLY(igraph_stack_destroy, &stack);

  for (father=0; father<no_of_nodes; father++) {
    long int level;

    IGRAPH_ALLOW_INTERRUPTION();
    
    /* init G */
    igraph_vector_clear(&vids); level=0;
    IGRAPH_CHECK(igraph_vector_push_back(&vids, father));
    subg[father]=1; added[father] += 1; level += 1;
    
    /* init V_E */
    igraph_vector_clear(&adjverts);
    neis=igraph_i_adjlist_get(&allneis, father);
    s=igraph_vector_size(neis);
    for (i=0; i<s; i++) {
      long int nei=VECTOR(*neis)[i];
      if (!added[nei] && nei > father) {
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, father));
      }
      added[nei] += 1;
    }
    
    /* init S */
    igraph_stack_clear(&stack);

    while (level != 1 || !igraph_vector_empty(&adjverts)) {
      igraph_real_t cp=VECTOR(*cut_prob)[level];
      
      if (level==size-1) {
	s=igraph_vector_size(&adjverts)/2;
	for (i=0; i<s; i++) {
	  long int k, s2;
	  long int last;

	  if (cp!=0 && RNG_UNIF01() < cp) { continue; }
	  motifs+=1;
	  
	  last=VECTOR(adjverts)[2*i];
	  IGRAPH_CHECK(igraph_vector_push_back(&vids, last));
	  subg[last]=size;

	  code=0; idx=0;
	  for (k=0; k<size; k++) {
	    long int from=VECTOR(vids)[k];
 	    neis=igraph_i_adjlist_get(&alloutneis, from);
	    s2=igraph_vector_size(neis);
	    for (j=0; j<s2; j++) {
	      long int nei=VECTOR(*neis)[j];
	      if (subg[nei] && k != subg[nei]-1) {
		idx=mul*k+(subg[nei]-1);
		code |= arr_idx[idx];
	      }
	    }
	  }

	  igraph_vector_pop_back(&vids);
	  subg[last]=0;
	  VECTOR(*hist)[arr_code[code]] += 1;
	}
      }

      /* can we step down? */
      if (level < size-1 && 
	  !igraph_vector_empty(&adjverts) && 
	  (cp==0 || RNG_UNIF01() > cp)) {
	/* yes, step down */
	long int neifather=igraph_vector_pop_back(&adjverts);
	long int nei=igraph_vector_pop_back(&adjverts);
	IGRAPH_CHECK(igraph_vector_push_back(&vids, nei));
	subg[nei] = level+1; added[nei] += 1; level += 1;

	IGRAPH_CHECK(igraph_stack_push(&stack, neifather));
	IGRAPH_CHECK(igraph_stack_push(&stack, nei));
	IGRAPH_CHECK(igraph_stack_push(&stack, level));
	
	neis=igraph_i_adjlist_get(&allneis, nei);
	s=igraph_vector_size(neis);
	for (i=0; i<s; i++) {
	  long int nei2=VECTOR(*neis)[i];
	  if (!added[nei2] && nei2 > father) {
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei2));
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	  }
	  added[nei2] += 1;
	}
      } else {
	/* no, step back */
	long int nei, neifather;
	while (!igraph_stack_empty(&stack) &&
	       level==igraph_stack_top(&stack)-1) {
	  igraph_stack_pop(&stack);
	  nei=igraph_stack_pop(&stack);
	  neifather=igraph_stack_pop(&stack);
	  igraph_vector_push_back(&adjverts, nei);
	  igraph_vector_push_back(&adjverts, neifather);
	}

	nei=igraph_vector_pop_back(&vids);
	subg[nei]=0; added[nei] -= 1; level -= 1;
	neis=igraph_i_adjlist_get(&allneis, nei);
	s=igraph_vector_size(neis);
	for (i=0; i<s; i++) {
	  added[ (long int) VECTOR(*neis)[i] ] -= 1;
	}
	while (!igraph_vector_empty(&adjverts) && 
	       igraph_vector_tail(&adjverts)==nei) {
	  igraph_vector_pop_back(&adjverts);
	  igraph_vector_pop_back(&adjverts);
	}
      }
      
    } /* while */

    /* clear the added vector */
    added[father] -= 1;
    subg[father] = 0;
    neis=igraph_i_adjlist_get(&allneis, father);
    s=igraph_vector_size(neis);
    for (i=0; i<s; i++) {
      added[ (long int) VECTOR(*neis)[i] ] -= 1;
    }

  } /* for father */

  Free(added);
  Free(subg);
  igraph_vector_destroy(&vids);
  igraph_vector_destroy(&adjverts);
  igraph_i_adjlist_destroy(&alloutneis);
  igraph_i_adjlist_destroy(&allneis);
  igraph_stack_destroy(&stack);
  IGRAPH_FINALLY_CLEAN(7);
  return 0;
}

/**
 * \function igraph_motifs_randesu_estimate
 * \brief Estimate the total number of motifs in a graph
 * 
 * </para><para>
 * This function is useful for large graphs for which it is not
 * feasible to count all the different motifs, because there is very
 * many of them.
 *
 * </para><para>
 * The total number of motifs is estimated by taking a sample of
 * vertices and counts all motifs in which these vertices are
 * included. (There is also a \c cut_prob parameter which gives the
 * probabilities to cut a branch of the search tree.)
 *
 * </para><para> 
 * Directed motifs will be counted in directed graphs and undirected
 * motifs in undirected graphs.
 *
 * \param graph The graph object to study.
 * \param est Pointer to an integer type, the result will be stored
 *        here.
 * \param size The size of the motif to look for.
 * \param cut_prob Vector giving the probabilities to cut a branch of
 *        the search tree and omit counting the motifs in that branch.
 *        It contains a probability for each level. Supply \c size
 *        zeros here to count all the motifs in the sample.
 * \param sample_size The number of vertices to use as the
 *        sample. This parameter is only used if the \c parsample
 *        argument is a null pointer.
 * \param parsample Either pointer to an initialized vector or a null
 *        pointer. If a vector then the vertex ids in the vector are
 *        used as a sample. If a null pointer then the \c sample_size
 *        argument is used to create a sample of vertices drawn with
 *        uniform probability.
 * \return Error code.
 * \sa \ref igraph_motifs_randesu(), \ref igraph_motifs_randesu_no().
 * 
 * Time complexity: TODO.
 */

int igraph_motifs_randesu_estimate(const igraph_t *graph, igraph_integer_t *est,
				   int size, const igraph_vector_t *cut_prob, 
				   igraph_integer_t sample_size, 
				   const igraph_vector_t *parsample) {

  long int no_of_nodes=igraph_vcount(graph);
  igraph_vector_t neis;
    
  igraph_vector_t vids;		/* this is G */
  igraph_vector_t adjverts;	/* this is V_E */
  igraph_stack_t stack;		/* this is S */
  long int *added;
  igraph_vector_t *sample;
  long int sam;
  long int i;

  added=Calloc(no_of_nodes, long int);
  if (added==0) {
    IGRAPH_ERROR("Cannot find motifs", IGRAPH_ENOMEM);
  }
  IGRAPH_FINALLY(igraph_free, added);

  IGRAPH_VECTOR_INIT_FINALLY(&vids, 0);
  IGRAPH_VECTOR_INIT_FINALLY(&adjverts, 0);
  IGRAPH_CHECK(igraph_stack_init(&stack, 0));
  IGRAPH_FINALLY(igraph_stack_destroy, &stack);
  IGRAPH_VECTOR_INIT_FINALLY(&neis, 0);

  if (parsample==0) {
    sample=Calloc(1, igraph_vector_t);
    if (sample==0) {
      IGRAPH_ERROR("Cannot estimate motifs", IGRAPH_ENOMEM);
    }
    IGRAPH_FINALLY(igraph_free, sample);
    IGRAPH_VECTOR_INIT_FINALLY(sample, 0);  
    IGRAPH_CHECK(igraph_random_sample(sample, 0, no_of_nodes-1, sample_size));
  } else {
    sample=(igraph_vector_t*)parsample;
    sample_size=igraph_vector_size(sample);
  }

  *est=0;

  for (sam=0; sam<sample_size; sam++) {
    long int father=VECTOR(*sample)[sam];
    long int level, s;

    IGRAPH_ALLOW_INTERRUPTION();

    /* init G */
    igraph_vector_clear(&vids); level=0;
    IGRAPH_CHECK(igraph_vector_push_back(&vids, father));
    added[father] += 1; level += 1;
    
    /* init V_E */
    igraph_vector_clear(&adjverts);
    IGRAPH_CHECK(igraph_neighbors(graph, &neis, father, IGRAPH_ALL));
    s=igraph_vector_size(&neis);
    for (i=0; i<s; i++) {
      long int nei=VECTOR(neis)[i];
      if (!added[nei]) {
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, father));
      }
      added[nei] += 1;
    }
    
    /* init S */
    igraph_stack_clear(&stack);

    while (level != 1 || !igraph_vector_empty(&adjverts)) {
      igraph_real_t cp=VECTOR(*cut_prob)[level];
      
      if (level==size-1) {
	s=igraph_vector_size(&adjverts)/2;
	for (i=0; i<s; i++) {
	  if (cp!=0 && RNG_UNIF01() < cp) { continue; }
	  (*est) += 1;
	}
      }

      if (level < size-1 && 
	  !igraph_vector_empty(&adjverts) && 
	  (cp==0 || RNG_UNIF01() > cp)) {
	/* yes, step down */
	long int neifather=igraph_vector_pop_back(&adjverts);
	long int nei=igraph_vector_pop_back(&adjverts);
	IGRAPH_CHECK(igraph_vector_push_back(&vids, nei));
	added[nei] += 1; level += 1;

	IGRAPH_CHECK(igraph_stack_push(&stack, neifather));
	IGRAPH_CHECK(igraph_stack_push(&stack, nei));
	IGRAPH_CHECK(igraph_stack_push(&stack, level));

	IGRAPH_CHECK(igraph_neighbors(graph, &neis, nei, IGRAPH_ALL));
	s=igraph_vector_size(&neis);
	for (i=0; i<s; i++) {
	  long int nei2=VECTOR(neis)[i];
	  if (!added[nei2]) {
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei2));
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	  }
	  added[nei2] += 1;
	}
      } else {
	/* no, step back */
	long int nei, neifather;
	while (!igraph_stack_empty(&stack) &&
	       level==igraph_stack_top(&stack)-1) {
	  igraph_stack_pop(&stack);
	  nei=igraph_stack_pop(&stack);
	  neifather=igraph_stack_pop(&stack);
	  igraph_vector_push_back(&adjverts, nei);
	  igraph_vector_push_back(&adjverts, neifather);
	}

	nei=igraph_vector_pop_back(&vids);
	added[nei] -= 1; level -= 1;
	IGRAPH_CHECK(igraph_neighbors(graph, &neis, nei, IGRAPH_ALL));
	s=igraph_vector_size(&neis);
	for (i=0; i<s; i++) {
	  added[ (long int) VECTOR(neis)[i] ] -= 1;
	}
	while (!igraph_vector_empty(&adjverts) && 
	       igraph_vector_tail(&adjverts)==nei) {
	  igraph_vector_pop_back(&adjverts);
	  igraph_vector_pop_back(&adjverts);
	}
      }
      
    } /* while */

    /* clear the added vector */
    added[father] -= 1;
    IGRAPH_CHECK(igraph_neighbors(graph, &neis, father, IGRAPH_ALL));
    s=igraph_vector_size(&neis);
    for (i=0; i<s; i++) {
      added[ (long int) VECTOR(neis)[i] ] -= 1;
    }

  } /* for father */

  (*est) *= ((double)no_of_nodes/sample_size);
  (*est) /= size;

  if (parsample==0) {
    igraph_vector_destroy(sample);
    Free(sample);
    IGRAPH_FINALLY_CLEAN(2);
  }

  Free(added);
  igraph_vector_destroy(&vids);
  igraph_vector_destroy(&adjverts);
  igraph_stack_destroy(&stack);
  igraph_vector_destroy(&neis);
  IGRAPH_FINALLY_CLEAN(5);
  return 0;
}

/**
 * \function igraph_motifs_randesu_no
 * \brief Count the total number of motifs in a graph
 *
 * </para><para> 
 * This function counts the total number of motifs in a graph without
 * assigning isomorphism classes to them. 
 *
 * </para><para> 
 * Directed motifs will be counted in directed graphs and undirected
 * motifs in undirected graphs.
 *
 * \param graph The graph object to study.
 * \param no Pointer to an integer type, the result will be stored
 *        here. 
 * \param size The size of the motifs to count.
 * \param cut_prob Vector giving the probabilities that a branch of
 *        the search tree will be cut at a given level.
 * \return Error code.
 * \sa \ref igraph_motifs_randesu(), \ref
 *     igraph_motifs_randesu_estimate(). 
 * 
 * Time complexity: TODO.
 */

int igraph_motifs_randesu_no(const igraph_t *graph, igraph_integer_t *no,
			     int size, const igraph_vector_t *cut_prob) {

  long int no_of_nodes=igraph_vcount(graph);
  igraph_vector_t neis;
    
  igraph_vector_t vids;		/* this is G */
  igraph_vector_t adjverts;	/* this is V_E */
  igraph_stack_t stack;		/* this is S */
  long int *added;
  long int father;
  long int i;

  added=Calloc(no_of_nodes, long int);
  if (added==0) {
    IGRAPH_ERROR("Cannot find motifs", IGRAPH_ENOMEM);
  }
  IGRAPH_FINALLY(igraph_free, added);

  IGRAPH_VECTOR_INIT_FINALLY(&vids, 0);
  IGRAPH_VECTOR_INIT_FINALLY(&adjverts, 0);
  IGRAPH_CHECK(igraph_stack_init(&stack, 0));
  IGRAPH_FINALLY(igraph_stack_destroy, &stack);
  IGRAPH_VECTOR_INIT_FINALLY(&neis, 0);

  *no=0;

  for (father=0; father<no_of_nodes; father++) {
    long int level, s;

    IGRAPH_ALLOW_INTERRUPTION();

    /* init G */
    igraph_vector_clear(&vids); level=0;
    IGRAPH_CHECK(igraph_vector_push_back(&vids, father));
    added[father] += 1; level += 1;
    
    /* init V_E */
    igraph_vector_clear(&adjverts);
    IGRAPH_CHECK(igraph_neighbors(graph, &neis, father, IGRAPH_ALL));
    s=igraph_vector_size(&neis);
    for (i=0; i<s; i++) {
      long int nei=VECTOR(neis)[i];
      if (!added[nei]) {
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	IGRAPH_CHECK(igraph_vector_push_back(&adjverts, father));
      }
      added[nei] += 1;
    }
    
    /* init S */
    igraph_stack_clear(&stack);

    while (level != 1 || !igraph_vector_empty(&adjverts)) {
      igraph_real_t cp=VECTOR(*cut_prob)[level];
      
      if (level==size-1) {
	s=igraph_vector_size(&adjverts)/2;
	for (i=0; i<s; i++) {
	  if (cp!=0 && RNG_UNIF01() < cp) { continue; }
	  (*no) += 1;
	}
      }

      if (level < size-1 && 
	  !igraph_vector_empty(&adjverts) && 
	  (cp==0 || RNG_UNIF01() > cp)) {
	/* yes, step down */
	long int neifather=igraph_vector_pop_back(&adjverts);
	long int nei=igraph_vector_pop_back(&adjverts);
	IGRAPH_CHECK(igraph_vector_push_back(&vids, nei));
	added[nei] += 1; level += 1;

	IGRAPH_CHECK(igraph_stack_push(&stack, neifather));
	IGRAPH_CHECK(igraph_stack_push(&stack, nei));
	IGRAPH_CHECK(igraph_stack_push(&stack, level));

	IGRAPH_CHECK(igraph_neighbors(graph, &neis, nei, IGRAPH_ALL));
	s=igraph_vector_size(&neis);
	for (i=0; i<s; i++) {
	  long int nei2=VECTOR(neis)[i];
	  if (!added[nei2]) {
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei2));
	    IGRAPH_CHECK(igraph_vector_push_back(&adjverts, nei));
	  }
	  added[nei2] += 1;
	}
      } else {
	/* no, step back */
	long int nei, neifather;
	while (!igraph_stack_empty(&stack) &&
	       level==igraph_stack_top(&stack)-1) {
	  igraph_stack_pop(&stack);
	  nei=igraph_stack_pop(&stack);
	  neifather=igraph_stack_pop(&stack);
	  igraph_vector_push_back(&adjverts, nei);
	  igraph_vector_push_back(&adjverts, neifather);
	}

	nei=igraph_vector_pop_back(&vids);
	added[nei] -= 1; level -= 1;
	IGRAPH_CHECK(igraph_neighbors(graph, &neis, nei, IGRAPH_ALL));
	s=igraph_vector_size(&neis);
	for (i=0; i<s; i++) {
	  added[ (long int) VECTOR(neis)[i] ] -= 1;
	}
	while (!igraph_vector_empty(&adjverts) && 
	       igraph_vector_tail(&adjverts)==nei) {
	  igraph_vector_pop_back(&adjverts);
	  igraph_vector_pop_back(&adjverts);
	}
      }
      
    } /* while */

    /* clear the added vector */
    added[father] -= 1;
    IGRAPH_CHECK(igraph_neighbors(graph, &neis, father, IGRAPH_ALL));
    s=igraph_vector_size(&neis);
    for (i=0; i<s; i++) {
      added[ (long int) VECTOR(neis)[i] ] -= 1;
    }

  } /* for father */

  *no /= size;

  Free(added);
  igraph_vector_destroy(&vids);
  igraph_vector_destroy(&adjverts);
  igraph_stack_destroy(&stack);
  igraph_vector_destroy(&neis);
  IGRAPH_FINALLY_CLEAN(5);
  return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1