/* -*- mode: C -*- */ /* IGraph R package. Copyright (C) 2006 Gabor Csardi MTA RMKI, Konkoly-Thege Miklos st. 29-33, Budapest 1121, Hungary This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "igraph.h" #include "config.h" #include /* isspace */ #include /* isnan */ #include #include "memory.h" #include /* va_start & co */ #if HAVE_LIBXML == 1 #include #include /* TODO: proper error handling */ typedef struct igraph_i_graphml_attribute_record_t { const char *id; /* GraphML id */ enum { I_GRAPHML_BOOLEAN, I_GRAPHML_INTEGER, I_GRAPHML_LONG, I_GRAPHML_FLOAT, I_GRAPHML_DOUBLE, I_GRAPHML_STRING, I_GRAPHML_UNKNOWN_TYPE } type; /* GraphML type */ igraph_i_attribute_record_t record; } igraph_i_graphml_attribute_record_t; int igraph_i_libxml2_read_callback(void *instream, char* buffer, int len) { int res; res=fread(buffer, 1, len, (FILE*)instream); if (res) return res; if (feof((FILE*)instream)) return 0; return -1; } int igraph_i_libxml2_close_callback(void *instream) { return 0; } struct igraph_i_graphml_parser_state { enum { START, INSIDE_GRAPHML, INSIDE_GRAPH, INSIDE_NODE, INSIDE_EDGE, INSIDE_KEY, INSIDE_DEFAULT, INSIDE_DATA, FINISH, UNKNOWN, ERROR } st; igraph_t *g; igraph_trie_t node_trie; igraph_strvector_t edgeids; igraph_vector_t edgelist; unsigned int prev_state; unsigned int unknown_depth; int index; igraph_bool_t successful, edges_directed, destroyed; igraph_trie_t v_names; igraph_vector_ptr_t v_attrs; igraph_trie_t e_names; igraph_vector_ptr_t e_attrs; igraph_trie_t g_names; igraph_vector_ptr_t g_attrs; xmlChar *data_key; igraph_attribute_elemtype_t data_type; char *error_message; }; void igraph_i_graphml_destroy_state(struct igraph_i_graphml_parser_state* state) { long int i; if (state->destroyed) return; state->destroyed=1; /* this is the easy part */ igraph_trie_destroy(&state->node_trie); igraph_strvector_destroy(&state->edgeids); igraph_trie_destroy(&state->v_names); igraph_trie_destroy(&state->e_names); igraph_trie_destroy(&state->g_names); igraph_vector_destroy(&state->edgelist); if (state->error_message) { free(state->error_message); } if (state->data_key) { free(state->data_key); } for (i=0; iv_attrs); i++) { igraph_i_graphml_attribute_record_t *rec=VECTOR(state->v_attrs)[i]; if (rec->record.type==IGRAPH_ATTRIBUTE_NUMERIC) { if (rec->record.value != 0) { igraph_vector_destroy((igraph_vector_t*)rec->record.value); Free(rec->record.value); } } else if (rec->record.type==IGRAPH_ATTRIBUTE_STRING) { if (rec->record.value != 0) { igraph_strvector_destroy((igraph_strvector_t*)rec->record.value); Free(rec->record.value); } } if (rec->id != 0) Free(rec->id); if (rec->record.name != 0) Free(rec->record.name); Free(rec); } for (i=0; ie_attrs); i++) { igraph_i_graphml_attribute_record_t *rec=VECTOR(state->e_attrs)[i]; if (rec->record.type==IGRAPH_ATTRIBUTE_NUMERIC) { if (rec->record.value != 0) { igraph_vector_destroy((igraph_vector_t*)rec->record.value); Free(rec->record.value); } } else if (rec->record.type==IGRAPH_ATTRIBUTE_STRING) { if (rec->record.value != 0) { igraph_strvector_destroy((igraph_strvector_t*)rec->record.value); Free(rec->record.value); } } if (rec->id != 0) Free(rec->id); if (rec->record.name != 0) Free(rec->record.name); Free(rec); } for (i=0; ig_attrs); i++) { igraph_i_graphml_attribute_record_t *rec=VECTOR(state->g_attrs)[i]; if (rec->record.type==IGRAPH_ATTRIBUTE_NUMERIC) { if (rec->record.value != 0) { igraph_vector_destroy((igraph_vector_t*)rec->record.value); Free(rec->record.value); } } else if (rec->record.type==IGRAPH_ATTRIBUTE_STRING) { if (rec->record.value != 0) { igraph_strvector_destroy((igraph_strvector_t*)rec->record.value); Free(rec->record.value); } } if (rec->id != 0) Free(rec->id); if (rec->record.name != 0) Free(rec->record.name); Free(rec); } igraph_vector_ptr_destroy(&state->v_attrs); igraph_vector_ptr_destroy(&state->e_attrs); igraph_vector_ptr_destroy(&state->g_attrs); IGRAPH_FINALLY_CLEAN(1); } void igraph_i_graphml_sax_handler_error(void *state0, const char* msg, ...) { struct igraph_i_graphml_parser_state *state= (struct igraph_i_graphml_parser_state*)state0; va_list ap; va_start(ap, msg); if (state->error_message == 0) state->error_message=Calloc(4096, char); state->successful=0; state->st=ERROR; vsnprintf(state->error_message, 4096, msg, ap); va_end(ap); } xmlEntityPtr igraph_i_graphml_sax_handler_get_entity(void *state0, const xmlChar* name) { return xmlGetPredefinedEntity(name); } void igraph_i_graphml_handle_unknown_start_tag(struct igraph_i_graphml_parser_state *state) { if (state->st != UNKNOWN) { state->prev_state=state->st; state->st=UNKNOWN; state->unknown_depth=1; } else state->unknown_depth++; } void igraph_i_graphml_sax_handler_start_document(void *state0) { struct igraph_i_graphml_parser_state *state= (struct igraph_i_graphml_parser_state*)state0; int ret; state->st=START; state->successful=1; state->edges_directed=0; state->destroyed=0; state->data_key=0; state->error_message=0; ret=igraph_vector_ptr_init(&state->v_attrs, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &state->v_attrs); ret=igraph_vector_ptr_init(&state->e_attrs, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &state->e_attrs); ret=igraph_vector_ptr_init(&state->g_attrs, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &state->g_attrs); ret=igraph_vector_init(&state->edgelist, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_destroy, &state->edgelist); ret=igraph_trie_init(&state->node_trie, 1); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_trie_destroy, &state->node_trie); ret=igraph_strvector_init(&state->edgeids, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_strvector_destroy, &state->edgeids); ret=igraph_trie_init(&state->v_names, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_trie_destroy, &state->v_names); ret=igraph_trie_init(&state->e_names, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_trie_destroy, &state->e_names); ret=igraph_trie_init(&state->g_names, 0); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_trie_destroy, &state->g_names); IGRAPH_FINALLY_CLEAN(9); IGRAPH_FINALLY(igraph_i_graphml_destroy_state, state); } void igraph_i_graphml_sax_handler_end_document(void *state0) { struct igraph_i_graphml_parser_state *state= (struct igraph_i_graphml_parser_state*)state0; long i, l; int r; igraph_i_attribute_record_t idrec, eidrec; const char *idstr="id"; if (!state->successful) return; if (state->index<0) { igraph_vector_ptr_t vattr, eattr, gattr; long int esize=igraph_vector_ptr_size(&state->e_attrs); r=igraph_vector_ptr_init(&vattr, igraph_vector_ptr_size(&state->v_attrs)+1); if (r) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, r); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &vattr); if (igraph_strvector_size(&state->edgeids) != 0) { esize++; } r=igraph_vector_ptr_init(&eattr, esize); if (r) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, r); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &eattr); r=igraph_vector_ptr_init(&gattr, igraph_vector_ptr_size(&state->g_attrs)); if (r) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, r); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_vector_ptr_destroy, &gattr); for (i=0; iv_attrs); i++) { igraph_i_graphml_attribute_record_t *graphmlrec= VECTOR(state->v_attrs)[i]; igraph_i_attribute_record_t *rec=&graphmlrec->record; if (rec->type == IGRAPH_ATTRIBUTE_NUMERIC) { igraph_vector_t *vec=(igraph_vector_t*)rec->value; long int origsize=igraph_vector_size(vec); long int nodes=igraph_trie_size(&state->node_trie); igraph_vector_resize(vec, nodes); for (l=origsize; ltype == IGRAPH_ATTRIBUTE_STRING) { igraph_strvector_t *strvec=(igraph_strvector_t*)rec->value; long int origsize=igraph_strvector_size(strvec); long int nodes=igraph_trie_size(&state->node_trie); igraph_strvector_resize(strvec, nodes); for (l=origsize; lnode_trie, (const igraph_strvector_t **)(&idrec.value)); VECTOR(vattr)[i]=&idrec; for (i=0; ie_attrs); i++) { igraph_i_graphml_attribute_record_t *graphmlrec= VECTOR(state->e_attrs)[i]; igraph_i_attribute_record_t *rec=&graphmlrec->record; if (rec->type == IGRAPH_ATTRIBUTE_NUMERIC) { igraph_vector_t *vec=(igraph_vector_t*)rec->value; long int origsize=igraph_vector_size(vec); long int edges=igraph_vector_size(&state->edgelist)/2; igraph_vector_resize(vec, edges); for (l=origsize; ltype == IGRAPH_ATTRIBUTE_STRING) { igraph_strvector_t *strvec=(igraph_strvector_t*)rec->value; long int origsize=igraph_strvector_size(strvec); long int edges=igraph_vector_size(&state->edgelist)/2; igraph_strvector_resize(strvec, edges); for (l=origsize; ledgeids) != 0) { long int origsize=igraph_strvector_size(&state->edgeids); eidrec.name=idstr; eidrec.type=IGRAPH_ATTRIBUTE_STRING; igraph_strvector_resize(&state->edgeids, igraph_vector_size(&state->edgelist)/2); for (; origsize < igraph_strvector_size(&state->edgeids); origsize++) { igraph_strvector_set(&state->edgeids, origsize, ""); } eidrec.value=&state->edgeids; VECTOR(eattr)[(long int)igraph_vector_ptr_size(&eattr)-1]=&eidrec; } for (i=0; ig_attrs); i++) { igraph_i_graphml_attribute_record_t *graphmlrec= VECTOR(state->g_attrs)[i]; igraph_i_attribute_record_t *rec=&graphmlrec->record; VECTOR(gattr)[i]=rec; } igraph_empty_attrs(state->g, 0, state->edges_directed, &gattr); igraph_add_vertices(state->g, igraph_trie_size(&state->node_trie), &vattr); igraph_add_edges(state->g, &state->edgelist, &eattr); igraph_vector_ptr_destroy(&vattr); igraph_vector_ptr_destroy(&eattr); igraph_vector_ptr_destroy(&gattr); IGRAPH_FINALLY_CLEAN(3); } igraph_i_graphml_destroy_state(state); } #define toXmlChar(a) (BAD_CAST(a)) #define fromXmlChar(a) ((char *)(a)) /* not the most elegant way... */ void igraph_i_graphml_add_attribute_key(const xmlChar** attrs, struct igraph_i_graphml_parser_state *state) { xmlChar **it; igraph_trie_t *trie=0; igraph_vector_ptr_t *ptrvector=0; long int id; int ret; igraph_i_graphml_attribute_record_t *rec= Calloc(1, igraph_i_graphml_attribute_record_t); if (!state->successful) return; if (rec==0) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, IGRAPH_ENOMEM); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } IGRAPH_FINALLY(igraph_free, rec); for (it=(xmlChar**)attrs; *it; it+=2) { if (xmlStrEqual(*it, toXmlChar("id"))) { const char *id=(const char*)(*(it+1)); rec->id=strdup(id); } else if (xmlStrEqual(*it, toXmlChar("attr.name"))) { const char *name=fromXmlChar(*(it+1)); rec->record.name=strdup(name); } else if (xmlStrEqual(*it, toXmlChar("attr.type"))) { if (xmlStrEqual(*(it+1), (xmlChar*)"boolean")) { rec->type=I_GRAPHML_BOOLEAN; rec->record.type=IGRAPH_ATTRIBUTE_NUMERIC; } else if (xmlStrEqual(*(it+1), toXmlChar("string"))) { rec->type=I_GRAPHML_STRING; rec->record.type=IGRAPH_ATTRIBUTE_STRING; } else if (xmlStrEqual(*(it+1), toXmlChar("float"))) { rec->type=I_GRAPHML_FLOAT; rec->record.type=IGRAPH_ATTRIBUTE_NUMERIC; } else if (xmlStrEqual(*(it+1), toXmlChar("double"))) { rec->type=I_GRAPHML_DOUBLE; rec->record.type=IGRAPH_ATTRIBUTE_NUMERIC; } else if (xmlStrEqual(*(it+1), toXmlChar("int"))) { rec->type=I_GRAPHML_INTEGER; rec->record.type=IGRAPH_ATTRIBUTE_NUMERIC; } else if (xmlStrEqual(*(it+1), toXmlChar("long"))) { rec->type=I_GRAPHML_LONG; rec->record.type=IGRAPH_ATTRIBUTE_NUMERIC; } else { igraph_error("Cannot parse GraphML file, unknown attribute type", __FILE__, __LINE__, IGRAPH_PARSEERROR); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file, unknown attribute type"); return; } } else if (xmlStrEqual(*it, toXmlChar("for"))) { /* graph, vertex or edge attribute? */ if (xmlStrEqual(*(it+1), toXmlChar("graph"))) { trie=&state->g_names; ptrvector=&state->g_attrs; } else if (xmlStrEqual(*(it+1), toXmlChar("node"))) { trie=&state->v_names; ptrvector=&state->v_attrs; } else if (xmlStrEqual(*(it+1), toXmlChar("edge"))) { trie=&state->e_names; ptrvector=&state->e_attrs; } else { igraph_error("Cannot parse GraphML file, unknown attribute type", __FILE__, __LINE__, IGRAPH_PARSEERROR); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file, unknown attribute type"); return; } } } if (trie == 0 && state->successful) { igraph_error("Cannot parse GraphML file, missing 'for' attribute", __FILE__, __LINE__, IGRAPH_PARSEERROR); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file, missing 'for' attribute"); return; } /* add to trie, attribues */ igraph_trie_get(trie, rec->id, &id); if (id != igraph_trie_size(trie)-1) { igraph_error("Cannot parse GraphML file, duplicate attribute", __FILE__, __LINE__, IGRAPH_PARSEERROR); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file, duplicate attribute"); return; } ret=igraph_vector_ptr_push_back(ptrvector, rec); if (ret) { igraph_error("Cannot read GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot read GraphML file"); return; } /* create the attribute values */ switch (rec->record.type) { igraph_vector_t *vec; igraph_strvector_t *strvec; case IGRAPH_ATTRIBUTE_NUMERIC: vec=Calloc(1, igraph_vector_t); if (vec==0) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, IGRAPH_ENOMEM); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } rec->record.value=vec; igraph_vector_init(vec, 0); break; case IGRAPH_ATTRIBUTE_STRING: strvec=Calloc(1, igraph_strvector_t); if (strvec==0) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, IGRAPH_ENOMEM); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } rec->record.value=strvec; igraph_strvector_init(strvec, 0); break; default: break; } IGRAPH_FINALLY_CLEAN(1); /* rec */ } void igraph_i_graphml_attribute_data_setup(struct igraph_i_graphml_parser_state *state, const xmlChar **attrs, igraph_attribute_elemtype_t type) { xmlChar **it; for (it=(xmlChar**)attrs; *it; it+=2) { if (xmlStrEqual(*it, toXmlChar("key"))) { if (state->data_key) { free(state->data_key); } state->data_key=xmlStrdup(*(it+1)); state->data_type=type; } else { /* ignore */ } } } void igraph_i_graphml_attribute_data_add(struct igraph_i_graphml_parser_state *state, const xmlChar *data) { const char *key=fromXmlChar(state->data_key); char *chardata; const xmlChar *end=xmlStrchr(data, (xmlChar) '<'); igraph_attribute_elemtype_t type=state->data_type; igraph_trie_t *trie=0; igraph_vector_ptr_t *ptrvector=0; igraph_i_graphml_attribute_record_t *graphmlrec; igraph_i_attribute_record_t *rec; long int recid; long int id=0; int ret; if (!state->successful) return; chardata=Calloc( (end-data)+1, char); if (chardata==0) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, IGRAPH_ENOMEM); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } memcpy(chardata, data, (end-data)*sizeof(char)); chardata[(end-data)]='\0'; switch (type) { case IGRAPH_ATTRIBUTE_GRAPH: trie=&state->g_names; ptrvector=&state->g_attrs; id=0; break; case IGRAPH_ATTRIBUTE_VERTEX: trie=&state->v_names; ptrvector=&state->v_attrs; id=igraph_trie_size(&state->node_trie)-1; /* hack */ break; case IGRAPH_ATTRIBUTE_EDGE: trie=&state->e_names; ptrvector=&state->e_attrs; id=igraph_vector_size(&state->edgelist)/2-1; /* hack */ break; default: /* impossible */ break; } igraph_trie_check(trie, key, &recid); if (recid < 0) { /* no such attribute key, issue a warning */ IGRAPH_WARNING("unknown attribute key in GraphML file, ignoring attribute"); Free(chardata); return; } graphmlrec=VECTOR(*ptrvector)[recid]; rec=&graphmlrec->record; switch (rec->type) { igraph_vector_t *vec; igraph_strvector_t *strvec; igraph_real_t num; long int s, i; case IGRAPH_ATTRIBUTE_NUMERIC: vec=(igraph_vector_t *)rec->value; s=igraph_vector_size(vec); if (id >= s) { ret=igraph_vector_resize(vec, id+1); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } for (i=s; ivalue; s=igraph_strvector_size(strvec); if (id >= s) { ret=igraph_strvector_resize(strvec, id+1); if (ret) { igraph_error("Cannot parse GraphML file", __FILE__, __LINE__, ret); igraph_i_graphml_sax_handler_error(state, "Cannot parse GraphML file"); return; } for (i=s;ist) { case START: /* If we are in the START state and received a graphml tag, * change to INSIDE_GRAPHML state. Otherwise, change to UNKNOWN. */ if (xmlStrEqual(name, toXmlChar("graphml"))) state->st=INSIDE_GRAPHML; else igraph_i_graphml_handle_unknown_start_tag(state); break; case INSIDE_GRAPHML: /* If we are in the INSIDE_GRAPHML state and received a graph tag, * change to INSIDE_GRAPH state if the state->index counter reached * zero (this is to handle multiple graphs in the same file). * Otherwise, change to UNKNOWN. */ if (xmlStrEqual(name, toXmlChar("graph"))) { if (state->index==0) { state->st=INSIDE_GRAPH; for (it=(xmlChar**)attrs; *it; it+=2) { if (xmlStrEqual(*it, toXmlChar("edgedefault"))) { if (xmlStrEqual(*(it+1), toXmlChar("directed"))) state->edges_directed=1; else if (xmlStrEqual(*(it+1), toXmlChar("undirected"))) state->edges_directed=0; } } } state->index--; } else if (xmlStrEqual(name, toXmlChar("key"))) { igraph_i_graphml_add_attribute_key(attrs, state); state->st=INSIDE_KEY; } else igraph_i_graphml_handle_unknown_start_tag(state); break; case INSIDE_KEY: /* If we are in the INSIDE_KEY state, check for default tag */ if (xmlStrEqual(name, toXmlChar("default"))) state->st=INSIDE_DEFAULT; else igraph_i_graphml_handle_unknown_start_tag(state); break; case INSIDE_DEFAULT: /* If we are in the INSIDE_DEFAULT state, every further tag will be unknown */ igraph_i_graphml_handle_unknown_start_tag(state); break; case INSIDE_GRAPH: /* If we are in the INSIDE_GRAPH state, check for node and edge tags */ if (xmlStrEqual(name, toXmlChar("edge"))) { id1=-1; id2=-1; directed=state->edges_directed; for (it=(xmlChar**)attrs; *it; it+=2) { if (xmlStrEqual(*it, toXmlChar("source"))) { igraph_trie_get(&state->node_trie, fromXmlChar(*(it+1)), &id1); } if (xmlStrEqual(*it, toXmlChar("target"))) { igraph_trie_get(&state->node_trie, fromXmlChar(*(it+1)), &id2); } if (xmlStrEqual(*it, toXmlChar("id"))) { long int edges=igraph_vector_size(&state->edgelist)/2+1; long int origsize=igraph_strvector_size(&state->edgeids); igraph_strvector_resize(&state->edgeids, edges); for (;origsize < edges-1; origsize++) { igraph_strvector_set(&state->edgeids, origsize, ""); } igraph_strvector_set(&state->edgeids, edges-1, fromXmlChar(*(it+1))); } } if (id1>=0 && id2>=0) { igraph_vector_push_back(&state->edgelist, id1); igraph_vector_push_back(&state->edgelist, id2); } else { igraph_i_graphml_sax_handler_error(state, "Edge with missing source or target encountered"); return; } state->st=INSIDE_EDGE; } else if (xmlStrEqual(name, toXmlChar("node"))) { for (it=(xmlChar**)attrs; *it; it+=2) { if (xmlStrEqual(*it, toXmlChar("id"))) { it++; igraph_trie_get(&state->node_trie, fromXmlChar(*it), &id1); break; } } state->st=INSIDE_NODE; } else if (xmlStrEqual(name, toXmlChar("data"))) { igraph_i_graphml_attribute_data_setup(state, attrs, IGRAPH_ATTRIBUTE_GRAPH); state->prev_state=state->st; state->st=INSIDE_DATA; } else igraph_i_graphml_handle_unknown_start_tag(state); break; case INSIDE_NODE: if (xmlStrEqual(name, toXmlChar("data"))) { igraph_i_graphml_attribute_data_setup(state, attrs, IGRAPH_ATTRIBUTE_VERTEX); state->prev_state=state->st; state->st=INSIDE_DATA; } break; case INSIDE_EDGE: if (xmlStrEqual(name, toXmlChar("data"))) { igraph_i_graphml_attribute_data_setup(state, attrs, IGRAPH_ATTRIBUTE_EDGE); state->prev_state=state->st; state->st=INSIDE_DATA; } break; default: break; } } void igraph_i_graphml_sax_handler_end_element(void *state0, const xmlChar* name) { struct igraph_i_graphml_parser_state *state= (struct igraph_i_graphml_parser_state*)state0; switch (state->st) { case INSIDE_GRAPHML: state->st=FINISH; break; case INSIDE_GRAPH: state->st=INSIDE_GRAPHML; break; case INSIDE_KEY: state->st=INSIDE_GRAPHML; break; case INSIDE_DEFAULT: state->st=INSIDE_KEY; break; case INSIDE_NODE: state->st=INSIDE_GRAPH; break; case INSIDE_EDGE: state->st=INSIDE_GRAPH; break; case INSIDE_DATA: state->st=state->prev_state; break; case UNKNOWN: state->unknown_depth--; if (!state->unknown_depth) state->st=state->prev_state; break; default: break; } } void igraph_i_graphml_sax_handler_chars(void* state0, const xmlChar* ch, int len) { struct igraph_i_graphml_parser_state *state= (struct igraph_i_graphml_parser_state*)state0; switch (state->st) { case INSIDE_KEY: case INSIDE_DEFAULT: break; case INSIDE_DATA: igraph_i_graphml_attribute_data_add(state, ch); break; default: // just ignore it break; } } static xmlSAXHandler igraph_i_graphml_sax_handler={ NULL, NULL, NULL, NULL, NULL, igraph_i_graphml_sax_handler_get_entity, NULL, NULL, NULL, NULL, NULL, NULL, igraph_i_graphml_sax_handler_start_document, igraph_i_graphml_sax_handler_end_document, igraph_i_graphml_sax_handler_start_element, igraph_i_graphml_sax_handler_end_element, NULL, igraph_i_graphml_sax_handler_chars, NULL, NULL, NULL, igraph_i_graphml_sax_handler_error, igraph_i_graphml_sax_handler_error, igraph_i_graphml_sax_handler_error, }; #endif int igraph_i_xml_escape(char* src, char** dest) { long int destlen=0; char *s, *d; for (s=src; *s; s++, destlen++) { if (*s == '&') destlen += 4; else if (*s == '<') destlen += 3; else if (*s == '>') destlen += 3; else if (*s == '"') destlen += 5; else if (*s == '\'') destlen += 5; } *dest=Calloc(destlen+1, char); if (!*dest) IGRAPH_ERROR("Not enough memory", IGRAPH_ENOMEM); for (s=src, d=*dest; *s; s++, d++) { switch (*s) { case '&': strcpy(d, "&"); d+=4; break; case '<': strcpy(d, "<"); d+=3; break; case '>': strcpy(d, ">"); d+=3; break; case '"': strcpy(d, """); d+=5; break; case '\'': strcpy(d, "'"); d+=5; break; default: *d = *s; } } *d=0; return 0; } /** * \ingroup loadsave * \function igraph_read_graph_graphml * \brief Reads a graph from a GraphML file. * * * GraphML is an XML-based file format for representing various types of * graphs. Currently only the most basic import functionality is implemented * in igraph: it can read GraphML files without nested graphs and hyperedges. * Attributes of the graph are loaded only if an attribute interface * is attached, ie. if you use igraph from R or Python. * \param graph Pointer to an uninitialized graph object. * \param instream A stream, it should be readable. * \param index If the GraphML file contains more than one graph, the one * specified by this index will be loaded. Indices start from * zero, so supply zero here if your GraphML file contains only * a single graph. * * \return Error code: * \c IGRAPH_PARSEERROR: if there is a * problem reading the file, or the file is syntactically * incorrect. * \c IGRAPH_UNIMPLEMENTED: the GraphML functionality was disabled * at compile-time */ int igraph_read_graph_graphml(igraph_t *graph, FILE *instream, int index) { #if HAVE_LIBXML == 1 xmlParserCtxtPtr ctxt; struct igraph_i_graphml_parser_state state; int res; char buffer[4096]; if (index<0) IGRAPH_ERROR("Graph index must be non-negative", IGRAPH_EINVAL); // Create a progressive parser context state.g=graph; state.index=index<0?0:index; ctxt=xmlCreateIOParserCtxt(&igraph_i_graphml_sax_handler, &state, igraph_i_libxml2_read_callback, igraph_i_libxml2_close_callback, instream, XML_CHAR_ENCODING_NONE); if (ctxt==NULL) IGRAPH_ERROR("Can't create progressive parser context", IGRAPH_PARSEERROR); // Parse the file while ((res=fread(buffer, 1, 4096, instream))>0) { xmlParseChunk(ctxt, buffer, res, 0); if (!state.successful) break; } xmlParseChunk(ctxt, buffer, res, 1); // Free the context xmlFreeParserCtxt(ctxt); if (!state.successful) { if (state.error_message != 0) IGRAPH_ERROR(state.error_message, IGRAPH_PARSEERROR); else IGRAPH_ERROR("Malformed GraphML file", IGRAPH_PARSEERROR); } if (state.index>=0) IGRAPH_ERROR("Graph index was too large", IGRAPH_EINVAL); return 0; #else IGRAPH_ERROR("GraphML support is disabled", IGRAPH_UNIMPLEMENTED); #endif } /** * \ingroup loadsave * \function igraph_write_graph_graphml * \brief Writes the graph to a file in GraphML format * * * GraphML is an XML-based file format for representing various types of * graphs. See the GraphML Primer (http://graphml.graphdrawing.org/primer/graphml-primer.html) * for detailed format description. * * \param graph The graph to write. * \param outstream The stream object to write to, it should be * writable. * \return Error code: * \c IGRAPH_EFILE if there is an error * writing the file. * * Time complexity: O(|V|+|E|) otherwise. All * file operations are expected to have time complexity * O(1). */ int igraph_write_graph_graphml(const igraph_t *graph, FILE *outstream) { int ret; igraph_integer_t l, vc; igraph_eit_t it; igraph_strvector_t gnames, vnames, enames; igraph_vector_t gtypes, vtypes, etypes; long int i; igraph_vector_t numv; igraph_strvector_t strv; ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); /* dump the elements if any */ IGRAPH_VECTOR_INIT_FINALLY(&numv, 1); IGRAPH_STRVECTOR_INIT_FINALLY(&strv, 1); IGRAPH_STRVECTOR_INIT_FINALLY(&gnames, 0); IGRAPH_STRVECTOR_INIT_FINALLY(&vnames, 0); IGRAPH_STRVECTOR_INIT_FINALLY(&enames, 0); IGRAPH_VECTOR_INIT_FINALLY(>ypes, 0); IGRAPH_VECTOR_INIT_FINALLY(&vtypes, 0); IGRAPH_VECTOR_INIT_FINALLY(&etypes, 0); igraph_i_attribute_get_info(graph, &gnames, >ypes, &vnames, &vtypes, &enames, &etypes); /* graph attributes */ for (i=0; i\n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) { ret=fprintf(outstream, " \n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } Free(name_escaped); } /* vertex attributes */ for (i=0; i\n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } else if (VECTOR(vtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) { ret=fprintf(outstream, " \n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } Free(name_escaped); } /* edge attributes */ for (i=0; i\n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } else if (VECTOR(etypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) { ret=fprintf(outstream, " \n", name_escaped, name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } Free(name_escaped); } ret=fprintf(outstream, " \n", (igraph_is_directed(graph)?"directed":"undirected")); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); /* Write the graph atributes before anything else */ for (i=0; i%g\n", name_escaped, VECTOR(numv)[0]); Free(name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_STRING) { char *s, *s_escaped; igraph_strvector_get(&gnames, i, &name); IGRAPH_CHECK(igraph_i_xml_escape(name, &name_escaped)); ret=fprintf(outstream, " ", name_escaped); Free(name_escaped); igraph_i_attribute_get_string_graph_attr(graph, name, &strv); igraph_strvector_get(&strv, 0, &s); IGRAPH_CHECK(igraph_i_xml_escape(s, &s_escaped)); ret=fprintf(outstream, "%s", s_escaped); Free(s_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } } /* Let's dump the nodes first */ vc=igraph_vcount(graph); for (l=0; l\n", (long)l); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); for (i=0; i%g\n", name_escaped, VECTOR(numv)[0]); Free(name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } } else if (VECTOR(vtypes)[i] == IGRAPH_ATTRIBUTE_STRING) { char *s, *s_escaped; igraph_strvector_get(&vnames, i, &name); IGRAPH_CHECK(igraph_i_xml_escape(name, &name_escaped)); ret=fprintf(outstream, " ", name_escaped); Free(name_escaped); igraph_i_attribute_get_string_vertex_attr(graph, name, igraph_vss_1(l), &strv); igraph_strvector_get(&strv, 0, &s); IGRAPH_CHECK(igraph_i_xml_escape(s, &s_escaped)); ret=fprintf(outstream, "%s", s_escaped); Free(s_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } } ret=fprintf(outstream, " \n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } /* Now the edges */ IGRAPH_CHECK(igraph_eit_create(graph, igraph_ess_all(0), &it)); IGRAPH_FINALLY(igraph_eit_destroy, &it); while (!IGRAPH_EIT_END(it)) { igraph_integer_t from, to; char *name, *name_escaped; long int edge=IGRAPH_EIT_GET(it); igraph_edge(graph, edge, &from, &to); ret=fprintf(outstream, " \n", (long int)from, (long int)to); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); for (i=0; i%g\n", name_escaped, VECTOR(numv)[0]); Free(name_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } else if (VECTOR(etypes)[i] == IGRAPH_ATTRIBUTE_STRING) { char *s, *s_escaped; igraph_strvector_get(&enames, i, &name); IGRAPH_CHECK(igraph_i_xml_escape(name, &name_escaped)); ret=fprintf(outstream, " ", name_escaped); Free(name_escaped); igraph_i_attribute_get_string_edge_attr(graph, name, igraph_ess_1(edge), &strv); igraph_strvector_get(&strv, 0, &s); IGRAPH_CHECK(igraph_i_xml_escape(s, &s_escaped)); ret=fprintf(outstream, "%s", s_escaped); Free(s_escaped); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); ret=fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); } } ret=fprintf(outstream, " \n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); IGRAPH_EIT_NEXT(it); } igraph_eit_destroy(&it); IGRAPH_FINALLY_CLEAN(1); ret=fprintf(outstream, " \n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); fprintf(outstream, "\n"); if (ret<0) IGRAPH_ERROR("Write failed", IGRAPH_EFILE); igraph_strvector_destroy(&gnames); igraph_strvector_destroy(&vnames); igraph_strvector_destroy(&enames); igraph_vector_destroy(>ypes); igraph_vector_destroy(&vtypes); igraph_vector_destroy(&etypes); igraph_vector_destroy(&numv); igraph_strvector_destroy(&strv); IGRAPH_FINALLY_CLEAN(8); return 0; }