// QuiXML Copyright (c) 2003 Sean O'Dell // QuiXML is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // QuiXML is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with QuiXML; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // $Id: quixml.c,v 1.10 2003/09/01 01:06:09 sdodell Exp $ // $Log: quixml.c,v $ // Revision 1.10 2003/09/01 01:06:09 sdodell // see changelog // // Revision 1.9 2003/08/31 22:11:24 sdodell // see changelog // // Revision 1.8 2003/08/31 07:28:45 sdodell // see changelog // // Revision 1.6 2003/08/30 07:49:32 sdodell // see changelog // // Revision 1.5 2003/08/28 09:09:15 sdodell // // see changelog // // Revision 1.3 2003/08/28 02:32:34 sdodell // // see changelog // // Revision 1.2 2003/08/27 06:05:53 sdodell // // added cvs keywords // // uncomment this to print debugging messages //#define PRINT_TRACING #include #include #include // module and class type values VALUE quixml_module = Qnil; VALUE quixml_classmod = Qnil; // print tracing macros #ifdef PRINT_TRACING #define TRACE_PRINT()fprintf(stderr, "TRACE: %s, %s, line %d\n", __FUNCTION__, __FILE__, __LINE__) #include // prints the status of various resource limits void trace_rlimit_print(int resource, char* resource_name) { struct rlimit rl; if(!getrlimit(resource, &rl)) fprintf(stderr, "RLIMIT %s: %u of %u max\n", resource_name, rl.rlim_cur, rl.rlim_max); } #define TRACE_RLIMIT_PRINT(resource)trace_rlimit_print(resource, #resource) #else #define TRACE_PRINT() #define TRACE_RLIMIT_PRINT(resource) #endif // PRINT_TRACING // for efficiency, we create some values in advance // and mark them as global so the garbage collector doesn't remove them // the excessive macros are to prevent any sort of global name-clashing // there's a goto somewhere around here, too // c is an *exciting* language // common strings #define DECLARE_QUIXML_STR(strname) \ VALUE quixml_##strname##_str #define REGISTER_QUIXML_STR(strname) \ quixml_##strname##_str = rb_str_new2(#strname); \ rb_global_variable(&quixml_##strname##_str); #define QUIXML_STR(strname) \ quixml_##strname##_str // declare string id's // they must be also initialized DECLARE_QUIXML_STR(name); DECLARE_QUIXML_STR(attributes); DECLARE_QUIXML_STR(children); DECLARE_QUIXML_STR(text); // quixml constants #define DECLARE_QUIXML_CONST(constname, value) \ const int quixml_##constname##_const = value #define REGISTER_QUIXML_CONST(constname) \ rb_define_const(quixml_module, #constname, INT2NUM(quixml_##constname##_const)); #define QUIXML_CONST(constname) \ quixml_##constname##_const // declare int constants for bit flags // they must be also initialized DECLARE_QUIXML_CONST(STRIP_START, 1); DECLARE_QUIXML_CONST(STRIP_END, 2); DECLARE_QUIXML_CONST(KEEP_MIXED, 4); DECLARE_QUIXML_CONST(NEWLINE_TEXT, 8); DECLARE_QUIXML_CONST(INDENT_TAGS, 16); // declare pre-mixed flags for various parsing/output styles DECLARE_QUIXML_CONST(PRETTY, 31); DECLARE_QUIXML_CONST(READABLE, 16); DECLARE_QUIXML_CONST(PERFECT, 0); DECLARE_QUIXML_CONST(CONSERVATIVE, 4); //---------------------------------------------------------------------- // string buffer handling functions #define BUFFER_BLOCK_SIZE 128 struct quixml_buffer { int buffer_size, buffer_index; char* buffer; }; // initialize the buffer struct // WARNING: this structure MUST be allocated with ALLOCA_N void quixml_init_buffer(struct quixml_buffer* buffer) { // allocate in a CG-friendly way buffer->buffer = ALLOC_N(char, BUFFER_BLOCK_SIZE); buffer->buffer_size = BUFFER_BLOCK_SIZE; buffer->buffer_index = 0; buffer->buffer[0] = '\0'; } // set the buffer back to empty void quixml_zero_buffer(struct quixml_buffer* buffer) { buffer->buffer_index = 0; buffer->buffer[0] = '\0'; } // append a data buffer void quixml_append_buffer(struct quixml_buffer* buffer, const char* data, int len) { int max_copy_len, new_size; char* newbuffer; int index; // exit if some crazy values come in if (!data || len <= 0) return; // calculate the maximum length the current buffer can take max_copy_len = (buffer->buffer_size - buffer->buffer_index) - 1; // if there isn't enough space if (len > max_copy_len) { // calculate a new string buffer size that is a multiple of BUFFER_BLOCK_SIZE new_size = buffer->buffer_index + len + 1; new_size = (new_size - (new_size % BUFFER_BLOCK_SIZE)) + BUFFER_BLOCK_SIZE; // realloc the string buffer using a GC-friendly macro newbuffer = ALLOC_N(char, new_size); memcpy(newbuffer, buffer->buffer, buffer->buffer_index); buffer->buffer = newbuffer; buffer->buffer_size = new_size; } // copy the new data to the end of the buffer memcpy(&buffer->buffer[buffer->buffer_index], data, len); // increment the buffer index buffer->buffer_index += len; //~ // just in case, give the end a NULL character buffer->buffer[buffer->buffer_index] = '\0'; } // append a string void quixml_append2_buffer(struct quixml_buffer* buffer, const char* data) { quixml_append_buffer(buffer, data, data ? strlen(data) : 0); } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.INITIALIZE //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_tree_class //~ !name new //~ !syntax ( < rootnode > ) //~ !return aNewQuiXMLTree //~ !para //~ Creates a new QuiXML tree object. If rootnode is provided, it will be used as //~ the root node for the tree, otherwise the root node will be empty (nil). //~ !code //~ tree = QuiXML::Tree.new //~ !end VALUE quixml_tree_initialize(int argc, VALUE *argv, VALUE self) { if (argc == 0) rb_iv_set(self, "@root", Qnil); else if (argc == 1) rb_iv_set(self, "@root", argv[0]); else rb_raise(rb_eException, "too many arguments to QuiXML::new"); return self; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.PARSE // Depends largely on expat to perform the parsing. Uses an array of // VALUEs to track the current level of node nesting as it parses. //---------------------------------------------------------------------- //---------------------------------------------------------------------- // the maximum number of nested nodes // this should be more than enough for any XML buffer #define MAX_PARSE_DEPTH 128 // user data for the expat parser // it uses an array MAX_PARSE_DEPTH long to track the node nesting // it also builds a string buffer for the text of the current // node it is working on struct quixml_parse_user_data { int node_count, node_index; VALUE* nodes; int flags; struct quixml_buffer buffer; }; // the beginning of a new xml node void quixml_start_element(void* user_data, const char* name, const char** attrs) { struct quixml_parse_user_data* parse_user_data; VALUE parent_node = Qnil; VALUE node, attributes, children, text; VALUE attrname_value, attrvalue_value, attrtrans_value; const char* attrname, * attrvalue; parse_user_data = (struct quixml_parse_user_data*)user_data; if (!parse_user_data) return; // when a new element starts parsing, the index should be equal // to the total max nesting allowed, otherwise something incremented // it too far and that's bad news if (parse_user_data->node_index > parse_user_data->node_count) { rb_raise(rb_eException, "this XML buffer is nested too deeply; %d node maximum", parse_user_data->node_count); return; } // if the node index is greater than 0, this must be a child node, so get the parent if (parse_user_data->node_index > 0) parent_node = parse_user_data->nodes[parse_user_data->node_index - 1]; // create a current node to work with and protect from garbage collection // until the node end marker is found node = rb_hash_new(); // the user data tracks which node in the nesting array we are currently working with. // once started, the node is set to where the current array position is, then the // index is incremented so our node sits in a parent position parse_user_data->nodes[parse_user_data->node_index] = node; // now increment the index parse_user_data->node_index++; // give the new node its name property rb_hash_aset(node, QUIXML_STR(name), rb_str_new2(name)); // if there is a parent if (parent_node != Qnil) { // get the children array children = rb_hash_aref(parent_node, QUIXML_STR(children)); // push the current node onto the parent's children array rb_ary_push(children, node); // since this new node has a parent (is nested) // there might be a string buffer being partially built // that belongs to the parent, so we have to assign the // text now to it // get the parent's text property text = rb_hash_aref(parent_node, QUIXML_STR(text)); // if the string buffer has any data, append if (parse_user_data->buffer.buffer_index > 0) text = rb_str_cat(text, parse_user_data->buffer.buffer, parse_user_data->buffer.buffer_index); // zero out the string buffer because the new node will need it quixml_zero_buffer(&parse_user_data->buffer); } // create the attributes property attributes = rb_hash_new(); rb_hash_aset(node, QUIXML_STR(attributes), attributes); // create the text property text = rb_str_new2(""); rb_hash_aset(node, QUIXML_STR(text), text); // create the children property children = rb_ary_new(); rb_hash_aset(node, QUIXML_STR(children), children); // process attributes if (attrs && *attrs) { // for every attribute, set in the attributes hash for the current node while(*attrs) { attrname = *attrs; attrs++; attrvalue = *attrs; attrname_value = rb_str_new2(attrname); attrvalue_value = rb_str_new2(attrvalue); // yield to allow the caller to transmute string data into real Ruby objects if (rb_block_given_p()) { // create the arguments to the block before yielding attrtrans_value = rb_ary_new(); rb_ary_push(attrtrans_value, attrname_value); rb_ary_push(attrtrans_value, attrvalue_value); // replace the attribute string value with the returned object attrvalue_value = rb_yield(attrtrans_value); } rb_hash_aset(attributes, attrname_value, attrvalue_value); if (*attrs) attrs++; } } } // the end of the current xml node void quixml_end_element(void* user_data, const char *name) { struct quixml_parse_user_data* parse_user_data; VALUE node, text, children; char* text_ptr; int text_len; parse_user_data = (struct quixml_parse_user_data*)user_data; if (!parse_user_data) return; // if the current node index is less then or equal to zero, // something decremented the index too much if (parse_user_data->node_index <= 0) { rb_raise(rb_eException, "unexpected node ending marker"); return; } // since we're finishing with this node, back up the node index // so the nesting position can be re-used by any siblings parse_user_data->node_index--; node = parse_user_data->nodes[parse_user_data->node_index]; // get the children array children = rb_hash_aref(node, QUIXML_STR(children)); // get the text property text = rb_hash_aref(node, QUIXML_STR(text)); // if the string buffer has any data, append if (parse_user_data->buffer.buffer_index > 0) text = rb_str_cat(text, parse_user_data->buffer.buffer, parse_user_data->buffer.buffer_index); // zero out the string buffer because we are done with it quixml_zero_buffer(&parse_user_data->buffer); // get the c string so we can trim where needed text_ptr = STR2CSTR(text); text_len = strlen(text_ptr); // if STRIP_END is flagged if ( (parse_user_data->flags&QUIXML_CONST(STRIP_END)) ) { // decrease the string length until non-whitespace is encountered while (text_len > 0 && isspace(text_ptr[text_len - 1])) text_len--; text_ptr[text_len] = '\0'; } // if STRIP_START is flagged if ( (parse_user_data->flags&QUIXML_CONST(STRIP_START)) ) { // increment the string pointer until non-whitespace is encountered // since we're effectively shortening the string, decrement the length as well while (isspace(*text_ptr)) { text_ptr++; text_len--; } } // replace the text string for the current node with our new, trimmed string rb_hash_aset(node, QUIXML_STR(text), rb_str_new(text_ptr, text_len)); } // append string data for the current xml node void quixml_element_data(void* user_data, const char* data, int len) { struct quixml_parse_user_data* parse_user_data; VALUE node, children; int max_copy_len, new_size; parse_user_data = (struct quixml_parse_user_data*)user_data; if (!parse_user_data) return; // if the current node index is less then or equal to zero, // something decremented the index too much if (parse_user_data->node_index <= 0) { rb_raise(rb_eException, "node index is too low"); return; } // if KEEP_MIXED is not set, we must ignore if there are already children if (! (parse_user_data->flags&QUIXML_CONST(KEEP_MIXED)) ) { // get the current node we're working with node = parse_user_data->nodes[parse_user_data->node_index - 1]; // get the children node children = rb_hash_aref(node, QUIXML_STR(children)); // skip incoming data if (RARRAY(children)->len > 0) return; } quixml_append_buffer(&parse_user_data->buffer, data, len); } // parse an XML buffer and return a node VALUE quixml_parse_xml_buffer(int argc, VALUE *argv) { int depth = 0; XML_Parser parser; char* xml_ptr; struct quixml_parse_user_data* parse_user_data; VALUE rootnode; // assert ( xml < flags > ) syntax if (argc < 1) rb_raise(rb_eException, "too few parameters"); if (argc > 2) rb_raise(rb_eException, "too many parameters"); // extract the c string buffer // and return nil if NULL xml_ptr = STR2CSTR(rb_obj_as_string(argv[0])); if (!xml_ptr) return Qnil; // create our user data on the stack, so GC will watch over it parse_user_data = ALLOCA_N(struct quixml_parse_user_data, 1); // initialize flags if (argc > 1) parse_user_data->flags = NUM2INT(argv[1]); else parse_user_data->flags = QUIXML_CONST(CONSERVATIVE); // prepare the user data used for this parse parse_user_data->node_index = 0; parse_user_data->node_count = MAX_PARSE_DEPTH; // create an array of VALUES used to manage node nestings parse_user_data->nodes = ALLOCA_N(VALUE, MAX_PARSE_DEPTH); if (!parse_user_data->nodes) rb_raise(rb_eException, "could not allocate memory for node array"); parse_user_data->nodes[0] = Qnil; // first node will be root, so clear just in case // create a string buffer quixml_init_buffer(&parse_user_data->buffer); // instantiate an expat parser parser = XML_ParserCreate(NULL); XML_SetUserData(parser, parse_user_data); XML_SetElementHandler(parser, quixml_start_element, quixml_end_element); XML_SetCharacterDataHandler(parser, quixml_element_data); if (!XML_Parse(parser, xml_ptr, strlen(xml_ptr), !0)) { rb_raise(rb_eStandardError, "QuiXML parse error: %s at string buffer line %d", XML_ErrorString(XML_GetErrorCode(parser)), XML_GetCurrentLineNumber(parser) ); return Qnil; } XML_ParserFree(parser); // the first node in the array is the top (root) node return parse_user_data->nodes[0]; } //~ !start //~ !group quixml_tree_instance //~ !name parse //~ !syntax ( string <, flags > ) { | name, value | block } //~ !return theRootNode //~ !para //~ Parses a string buffer which should contain only valid XML and returns //~ the tree's new root node. If a //~ block is provided, it is called for every attribute encountered, //~ passing its name and current string value and will set the internal //~ value of the attribute to the last object returned by the block. //~ //~ Optional parameter flags determines how XML is parsed. QuiXML::CONSERVATIVE is default. //~ See How QuiXML Parses and Outputs XML. //~ !code //~ tree = QuiXML::Tree.new //~ tree.parse(File::readlines("file.xml")) do | name, value | //~ case name //~ when "id" //~ value = value.to_i //~ end //~ value //~ end //~ !end VALUE quixml_tree_parse(int argc, VALUE *argv, VALUE self) { VALUE rootnode; // parse the xml buffer given rootnode = quixml_parse_xml_buffer(argc, argv); // replace our root node with the new parsed tree rb_iv_set(self, "@root", rootnode); return rootnode; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.GET/SET_ROOT //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_tree_instance //~ !name root //~ !syntax //~ !return theRootNode //~ !para //~ Returns the hash object stored at the root node. //~ !code //~ tree = QuiXML::parse(File::readlines("file.xml")) //~ tree.root["children"] << QuiXML::create_node //~ !end VALUE quixml_tree_get_root(VALUE self) { return rb_iv_get(self, "@root"); } //~ !start //~ !group quixml_tree_instance //~ !name root= //~ !syntax //~ !return theRootNode //~ !para //~ Sets the root node to an arbitrary object, which should be a hash object of a //~ structure QuiXML recognizes. //~ !code //~ tree = QuiXML::Tree.new //~ tree.root = QuiXML::create_node //~ !end VALUE quixml_tree_set_root(VALUE self, VALUE rootnode) { return rb_iv_set(self, "@root", rootnode); } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.TO_XML //---------------------------------------------------------------------- //---------------------------------------------------------------------- // helper struct used when build xml string buffer // WARNING: this structure MUST be allocated with ALLOCA_N struct quixml_make_xml_helper { int level; int flags; struct quixml_buffer buffer; }; // simple char-to-string encoding unit struct quixml_encoding { char entity; const char * encoding; }; // table of xml encodings struct quixml_encoding quixml_encoding_table[] = { '\'', "'", '\"', """, '&', "&", '<', "<", '>', ">", 0, NULL }; // utility function to perform a ruby concatenation with XML encoding void quixml_concat_xml_buffer(struct quixml_buffer* buffer, const char* source) { const char* head_ptr, * tail_ptr; struct quixml_encoding *encoding_table; head_ptr = source; if (!head_ptr) return; // set up the tail pointer tail_ptr = head_ptr; // iterate through all the characters in the source string while (*head_ptr) { // set up an encoding table iteration pointer encoding_table = &quixml_encoding_table[0]; // iterate for each entry in the table // test each entry against the current character // until a ZERO entity is found in the table marking the end of the table while (encoding_table->entity) { // if the current character matches an entry if (*head_ptr == encoding_table->entity) { // append to the result string any characters skipped // by previous non-matching tests quixml_append_buffer(buffer, tail_ptr, head_ptr - tail_ptr); // now append the encoding string itself quixml_append2_buffer(buffer, encoding_table->encoding); // advance the tail to where the next head will be when we break tail_ptr = &head_ptr[1]; break; } // if no match (thus no loop break), walk through the table encoding_table++; } // advance the head to test the next character head_ptr++; } // when the head pointer hits a NULL character append any characters we // skipped by the last sequence of non-matching tests quixml_append_buffer(buffer, tail_ptr, head_ptr - tail_ptr); } //---------------------------------------------------------------------- // called by quixml_iterate_ruby_tree VALUE quixml_iterate_ruby_attrs(VALUE attributes, VALUE helper_object) { VALUE name, value, trans; struct quixml_make_xml_helper *helper; // with each iteration, the hash's key/value pair is given as a 2-item array // so get the key name = rb_ary_entry(attributes, 0); // and get the value value = rb_ary_entry(attributes, 1); // make sure our key (name) is a string Check_Type(name, T_STRING); // extract the helper associated with this recursive task Data_Get_Struct(helper_object, struct quixml_make_xml_helper, helper); // yield to allow the caller to transmute Ruby objects into a proper string representation if (rb_block_given_p()) { // create the arguments to the block before yielding trans = rb_ary_new(); rb_ary_push(trans, name); rb_ary_push(trans, value); // replace the attribute object with the returned value value = rb_yield(trans); } // append the key=value string to the attributes line quixml_append2_buffer(&helper->buffer, " "); quixml_append2_buffer(&helper->buffer, STR2CSTR(name)); quixml_append2_buffer(&helper->buffer, "=\""); quixml_concat_xml_buffer(&helper->buffer, STR2CSTR(rb_obj_as_string(value))); quixml_append2_buffer(&helper->buffer, "\""); return Qtrue; } //---------------------------------------------------------------------- // called by quixml_make_xml VALUE quixml_iterate_ruby_tree(int node_index, VALUE node, VALUE helper_object) { struct quixml_make_xml_helper *helper; VALUE name, attributes, children, text; int index, count; // make sure the node is a hash Check_Type(node, T_HASH); // extract the helper associated with this recursive task Data_Get_Struct(helper_object, struct quixml_make_xml_helper, helper); // each node is a hash with a given set of entries, // type (or name), text (the string text of the node), // nodes (child nodes) // need to add attributes (as a hash) // extract those entries now name = rb_hash_aref(node, QUIXML_STR(name)); attributes = rb_hash_aref(node, QUIXML_STR(attributes)); children = rb_hash_aref(node, QUIXML_STR(children)); text = rb_hash_aref(node, QUIXML_STR(text)); // reset some vars back to nil, if they're empty (reduces code below) if (!*STR2CSTR(text)) text = Qnil; if (RARRAY(children)->len <= 0) children = Qnil; // the node must at least have a name Check_Type(name, T_STRING); if (node_index > 0 || (helper->flags&QUIXML_CONST(NEWLINE_TEXT)) ) { // if INDENT_TAGS if flagged if ( (helper->flags&QUIXML_CONST(INDENT_TAGS)) ) { // the helper tracks the output indentation level // add spaces to the beginning, 2 for each level for(index = 0 ; index < helper->level ; index++) quixml_append2_buffer(&helper->buffer, " "); } } // begin a new XML node quixml_append2_buffer(&helper->buffer, "<"); quixml_append2_buffer(&helper->buffer, STR2CSTR(name)); // call iterate on attributes // for each pass, call quixml_iterate_ruby_attrs if (attributes != Qnil) rb_iterate(rb_each, attributes, RUBY_METHOD_FUNC(quixml_iterate_ruby_attrs), helper_object); // if there is no text, and no children // simply close the entire node and return if (text == Qnil && children == Qnil) { quixml_append2_buffer(&helper->buffer, " />"); // newline when we're indenting if ( (helper->flags&QUIXML_CONST(INDENT_TAGS)) ) quixml_append2_buffer(&helper->buffer, "\n"); return Qtrue; } // the node marker output is complete here, so close quixml_append2_buffer(&helper->buffer, ">"); // if there is text, output it now if (text != Qnil) { Check_Type(text, T_STRING); quixml_concat_xml_buffer(&helper->buffer, STR2CSTR(text)); } // if there are child nodes if (children != Qnil) { // child nodes must be an array Check_Type(children, T_ARRAY); // increase the node level helper->level++; // if NEWLINE_TEXT is flagged, output newline after our text, since there are child elements if ( (helper->flags&QUIXML_CONST(NEWLINE_TEXT)) ) quixml_append2_buffer(&helper->buffer, "\n"); // for each child node, recurse into iterate_ruby_tree (us) again count = RARRAY(children)->len; for(index = 0 ; index < count ; index++) quixml_iterate_ruby_tree(index, RARRAY(children)->ptr[index], helper_object); // decrease the node level helper->level--; // if we had children and INDENT_TAGS if flagged if ( count > 0 && (helper->flags&QUIXML_CONST(INDENT_TAGS)) ) { // when iterate_ruby_tree returns, it ends in a newline // so we need to re-indent back to matching this node's start marker for(index = 0 ; index < helper->level ; index++) quixml_append2_buffer(&helper->buffer, " "); } } // output our closure marker quixml_append2_buffer(&helper->buffer, "buffer, STR2CSTR(name)); quixml_append2_buffer(&helper->buffer, ">"); // newline when we're indenting if ( (helper->flags&QUIXML_CONST(INDENT_TAGS)) ) quixml_append2_buffer(&helper->buffer, "\n"); return Qtrue; } //---------------------------------------------------------------------- //~ !start //~ !group quixml_tree_instance //~ !name to_xml //~ !syntax { | name, value | block } //~ !return aString //~ !para //~ Creates a valid XML string buffer which represents the entire data tree //~ stored internally starting at the root node. If a block is provided, //~ it is called for every attribute encountered, passing its name and current //~ value and will change the outputted value of the attribute to the string //~ representation of the last object returned by the block. //~ //~ Optional parameter flags determines how XML is generated. QuiXML::CONSERVATIVE is default. //~ See How QuiXML Parses and Outputs XML. //~ !code //~ print tree.to_xml(QuiXML::PRETTY) do | name, value | //~ case name //~ when "id" //~ value = value.to_s //~ end //~ value //~ end //~ !end VALUE quixml_tree_to_xml(int argc, VALUE *argv, VALUE self) { struct quixml_make_xml_helper* helper; VALUE rootnode, helper_object, xml; // assert ( < flags > ) syntax if (argc > 1) rb_raise(rb_eException, "too many parameters"); // get our root node rootnode = rb_iv_get(self, "@root"); if (rootnode == Qnil) return rb_str_new2(""); // create our helper on the stack, so GC will watch over it helper = ALLOCA_N(struct quixml_make_xml_helper, 1); // create helper helper->level = 0; quixml_init_buffer(&helper->buffer); // initialize flags if (argc > 0) helper->flags = NUM2INT(argv[0]); else helper->flags = QUIXML_CONST(CONSERVATIVE); // wrap helper into a generic ruby object helper_object = Data_Wrap_Struct(rb_cObject, 0, NULL, helper); // begin recursion, and return the resulting string // kept by the helper quixml_iterate_ruby_tree(0, rootnode, helper_object); // make a ruby string from the resulting xml buffer xml = rb_str_new(helper->buffer.buffer, helper->buffer.buffer_index); return xml; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.FIND_ALL/FIND_FIRST //---------------------------------------------------------------------- //---------------------------------------------------------------------- // data structure used during a sequence of tests struct quixml_find_data { int test_count, test_index; VALUE *tests; VALUE result; }; // initialize the find_data structure void quixml_init_find_data(struct quixml_find_data* find_data, VALUE* tests) { // for a variable-length sequence, get array of test values if (TYPE(*tests) == T_ARRAY) { find_data->tests = RARRAY(*tests)->ptr; find_data->test_count = RARRAY(*tests)->len; } // otherwise this is a single test else { find_data->tests = tests; find_data->test_count = 1; } // set our test index to zero to start find_data->test_index = 0; // we will push our full matches onto this result array find_data->result = rb_ary_new(); } // the ID Ruby is using for the case-equality method name "===" int quixml_id_case_eq; // return values when testing a single node #define QUIXML_NO_MATCH 0 #define QUIXML_MATCHES_SOFAR -1 #define QUIXML_MATCHES_FINAL 1 // test just one node // return QUIXML_MATCHES_SOFAR when the node matches but there are more tests in the tree to perform // if it's a full match, return QUIXML_MATCHES_FINAL // otherwise return QUIXML_NO_MATCH int quixml_test_node(struct quixml_find_data* find_data, VALUE node) { VALUE name, test, attrname, attrtest, attributes, attrvalue; // the current test index is too high if (find_data->test_index >= find_data->test_count) return QUIXML_NO_MATCH; // get the node's name name = rb_hash_aref(node, QUIXML_STR(name)); if (name == Qnil) return QUIXML_NO_MATCH; // get the current test test = find_data->tests[find_data->test_index]; if (test == Qnil) return QUIXML_NO_MATCH; // perform a node name/attribute test if (TYPE(test) == T_ARRAY) { if (RARRAY(test)->len != 3) { rb_raise(rb_eException, "attribute test must be a 3-item array"); return; } // retrieve all our test values attrtest = RARRAY(test)->ptr[2]; attrname = RARRAY(test)->ptr[1]; test = RARRAY(test)->ptr[0]; if (attrname == Qnil || test == Qnil) return QUIXML_NO_MATCH; // prepare the attribute value attributes = rb_hash_aref(node, QUIXML_STR(attributes)); attrvalue = attributes != Qnil ? rb_hash_aref(attributes, attrname) : Qnil; // if the node name matches, continue on to the attribute test if (rb_respond_to(test, quixml_id_case_eq) && RTEST(rb_funcall(test, quixml_id_case_eq, 1, name)) ) { // if both the attribute value and attribute test are nil (a valid match) if (attrtest == Qnil && attrvalue == Qnil) goto quixml_test_node_matches; // otherwise, also test the attribute value else if (rb_respond_to(attrtest, quixml_id_case_eq) && RTEST(rb_funcall(attrtest, quixml_id_case_eq, 1, attrvalue))) goto quixml_test_node_matches; } } // perform just a simple node name test else { // the very first node always matches "." if (find_data->test_index == 0 && TYPE(test) == T_STRING) { char* value = STR2CSTR(test); if (value && strcmp(value, ".") == 0) goto quixml_test_node_matches; } if (rb_respond_to(test, quixml_id_case_eq) && RTEST(rb_funcall(test, quixml_id_case_eq, 1, name))) goto quixml_test_node_matches; } // by default, return no-match return QUIXML_NO_MATCH; // I'm well aware of the anti-goto arguments quixml_test_node_matches: // if this is the last test in the sequence, this is a final match if (find_data->test_index == (find_data->test_count - 1)) { // add node to the result array rb_ary_push(find_data->result, node); return QUIXML_MATCHES_FINAL; } // otherwise, this is just a partial match so far else return QUIXML_MATCHES_SOFAR; } // when testing all children of a node, pass one of these // values to just_get_one #define QUIXML_GET_ALL 0 #define QUIXML_GET_FIRST 1 // test all children of a given node void quixml_test_children(struct quixml_find_data* find_data, VALUE node, int just_get_one) { VALUE children; int child_count, child_index; VALUE* children_ptr; int result; // the last test index was for this node's parent, so increment now find_data->test_index++; // test index is too high now if (find_data->test_index >= find_data->test_count) { find_data->test_index--; return; } // one requirement: there must be children children = rb_hash_aref(node, QUIXML_STR(children)); if (children == Qnil) { find_data->test_index--; return; } Check_Type(children, T_ARRAY); children_ptr = RARRAY(children)->ptr; child_count = RARRAY(children)->len; // iterate through each child element for (child_index = 0; child_index < child_count ; child_index++) { // test the child node result = quixml_test_node(find_data, children_ptr[child_index]); // if we're only getting the first match, stop iterating now if (result == QUIXML_MATCHES_FINAL && just_get_one) break; // else if this node is just a partial match, test all of its children as well else if (result == QUIXML_MATCHES_SOFAR) quixml_test_children(find_data, children_ptr[child_index], just_get_one); // since we don't care if the node matched fully or not at all at this point // we just keep iterating through all the children } // clean-up before returning find_data->test_index--; } //~ !start //~ !group quixml_tree_instance //~ !name find_all //~ !syntax ( < case < , case < , ... > > > ) { | node | block } //~ !return anArray //~ !para //~ Returns an array containing every node that matches the entire series of case-equality //~ tests. If no match is found, returns an empty array. If a block is given, it is called for //~ every matching node found, passing the node as a parameter. See Element Path Addressing. //~ !code //~ tree.find_all(".", ["account", "id", [0..200]]) do | node | //~ process_account(node) //~ end //~ !end VALUE quixml_tree_find_all(VALUE self, VALUE tests) { VALUE rootnode; struct quixml_find_data* find_data; // get our root node rootnode = rb_iv_get(self, "@root"); if (rootnode == Qnil) return Qnil; // create our find_data in a GC-friendly way find_data = ALLOCA_N(struct quixml_find_data, 1); // init the find_data structure quixml_init_find_data(find_data, &tests); // test the root node before iterating through its children if (quixml_test_node(find_data, rootnode) == QUIXML_MATCHES_SOFAR) quixml_test_children(find_data, rootnode, QUIXML_GET_ALL); // yield if (rb_block_given_p()) { int index, count; VALUE* vars; count = RARRAY(find_data->result)->len; vars = RARRAY(find_data->result)->ptr; for (index = 0 ; index < count ; index++) rb_yield(vars[index]); } // return the result array return find_data->result; } //~ !start //~ !group quixml_tree_instance //~ !name find_first //~ !syntax ( < case < , case < , ... > > > ) //~ !return aNode //~ !para //~ Returns the first node (a hash) which matches the entire series of //~ case-equality tests. If no match is found, returns nil. See //~ Element Path Addressing. //~ !code //~ node = tree.find_first(".", [ "account", "id", 42 ]) //~ process_account(node) if (node) //~ !end VALUE quixml_tree_find_first(VALUE self, VALUE tests) { VALUE rootnode, result; struct quixml_find_data* find_data; // get our root node rootnode = rb_iv_get(self, "@root"); if (rootnode == Qnil) return Qnil; // create our find_data in a GC-friendly way find_data = ALLOCA_N(struct quixml_find_data, 1); // init the find_data structure quixml_init_find_data(find_data, &tests); // test the root node before iterating through its children if (quixml_test_node(find_data, rootnode) == QUIXML_MATCHES_SOFAR) quixml_test_children(find_data, rootnode, QUIXML_GET_FIRST); // get the first element in the result array result = rb_ary_shift(find_data->result); // return just the first matching element, or perhaps nil return result; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::TREE.ATTRIBUTES_OF/CHILDREN_OF/TEXT_OF //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_tree_instance //~ !name attributes_of //~ !syntax ( < case < , case < , ... > > > ) //~ !return theAttributes //~ !para //~ Returns the attributes (a hash) of the first node which matches the //~ entire series of case-equality tests. If no match is found, returns //~ nil. See Element Path Addressing. //~ !code //~ attrs = tree.attributes_of(".", ["account", "id", 42]) //~ attrs["date_processed"] = Time.now if (attrs) //~ !end VALUE quixml_tree_attributes_of(VALUE self, VALUE tests) { VALUE result; // get the first matching node result = quixml_tree_find_first(self, tests); // if a node returned and not nil, get its attributes if (result != Qnil) result = rb_hash_aref(result, QUIXML_STR(attributes)); // return the attributes, or perhaps nil return result; } //~ !start //~ !group quixml_tree_instance //~ !name children_of //~ !syntax ( < case < , case < , ... > > > ) //~ !return theChildren //~ !para //~ Returns the child nodes (an array) of the first node which matches the //~ entire series of case-equality tests. If no match is found, returns //~ nil. See Element Path Addressing. //~ !code //~ children = tree.children_of(".", ["account", "id", 42], "notes") //~ children << QuiXML::create_node("note") if (attrs) //~ !end VALUE quixml_tree_children_of(VALUE self, VALUE tests) { VALUE result; // get the first matching node result = quixml_tree_find_first(self, tests); // if a node returned and not nil, get its children if (result != Qnil) result = rb_hash_aref(result, QUIXML_STR(children)); // return the children, or perhaps nil return result; } //~ !start //~ !group quixml_tree_instance //~ !name text_of //~ !syntax ( < case < , case < , ... > > > ) //~ !return theText //~ !para //~ Returns the text content (a string) of the first node which matches the //~ entire series of case-equality tests. If no match is found, returns //~ nil. See Element Path Addressing. //~ !code //~ content = tree.text_of(".", ["account", "id", 42], "name") //~ print ("account name: #{content}\n") //~ !end VALUE quixml_tree_text_of(VALUE self, VALUE tests) { VALUE result; // get the first matching node result = quixml_tree_find_first(self, tests); // if a node returned and not nil, get its text if (result != Qnil) result = rb_hash_aref(result, QUIXML_STR(text)); // return the text, or perhaps nil return result; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::PARSE //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_module //~ !name parse //~ !syntax ( string <, flags > ) { | name, value | block } //~ !return aNewQuiXMLTree //~ !para //~ Parses a string buffer which should contain only valid XML and returns //~ a new QuiXML::Tree object. If a block is //~ provided, it is called for every attribute encountered, passing its name //~ and current string value and will set the internal value of the attribute //~ to the last object returned by the block. //~ //~ Optional parameter flags determines how XML is parsed. QuiXML::CONSERVATIVE is default. //~ See How QuiXML Parses and Outputs XML. //~ !code //~ tree = QuiXML::parse(File::readlines("file.xml")) do | name, value | //~ case name //~ when "id" //~ value = value.to_i //~ end //~ value //~ end //~ !end VALUE quixml_parse(int argc, VALUE *argv, VALUE self) { VALUE tree; // assert ( xml < flags > ) syntax if (argc < 1) rb_raise(rb_eException, "too few parameters"); if (argc > 2) rb_raise(rb_eException, "too many parameters"); // create a new quixml::tree class instance tree = rb_class_new_instance (0, NULL, quixml_classmod); // call its parser quixml_tree_parse(argc, argv, tree); return tree; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::WITH_NODE //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_module //~ !name with_node //~ !syntax ( node ) { | tree | block } //~ !return aNewQuiXMLTree //~ !para //~ Creates a new QuiXML::Tree object using the given node as its root, and //~ then passes the new tree as a parameter to the block given. //~ !code //~ node = tree.find_first(".", ["account", "id", 42]) //~ QuiXML::with_node(node) do | sub_tree | //~ sub_tree.find_all("notes", "note") do | note_node | //~ print "#{note_node['text']}\n" //~ end //~ end //~ !end VALUE quixml_with_node(VALUE self, VALUE node) { VALUE tree; // with_node must have a block if (!rb_block_given_p()) rb_raise(rb_eException, "no block given"); // create a new quixml::tree class instance tree = rb_class_new_instance (1, &node, quixml_classmod); rb_yield(tree); return tree; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::CREATE_NODE //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_module //~ !name create_node //~ !syntax ( name ) //~ !return aNewNode //~ !para //~ Creates a new, empty node which has the name given. This node is a hash //~ containing native Ruby data types for its attributes, children and text content. //~ !code //~ tree = QuiXML::Tree.new //~ tree.root = QuiXML::create_node //~ !end VALUE quixml_create_node(VALUE self, VALUE name) { VALUE node, attributes, text, children; // create a new node node = rb_hash_new(); // give the new node its name property rb_hash_aset(node, QUIXML_STR(name), rb_str_dup(name)); // create the attributes property attributes = rb_hash_new(); rb_hash_aset(node, QUIXML_STR(attributes), attributes); // create the text property text = rb_str_new2(""); rb_hash_aset(node, QUIXML_STR(text), text); // create the children property children = rb_ary_new(); rb_hash_aset(node, QUIXML_STR(children), children); return node; } //---------------------------------------------------------------------- //---------------------------------------------------------------------- // QUIXML::PARSE_NODE //---------------------------------------------------------------------- //---------------------------------------------------------------------- //~ !start //~ !group quixml_module //~ !name parse_node //~ !syntax ( string <, flags > ) { | name, value | block } //~ !return aNewNode //~ !para //~ Parses a string buffer which should contain only valid XML and returns //~ a new top (root) node for the tree as a hash containing all the basic //~ node properties (name, //~ attributes, children and text). If a block is //~ provided, it is called for every attribute encountered, passing its name //~ and current string value and will set the internal value of the attribute //~ to the last object returned by the block. //~ //~ Optional parameter flags determines how XML is parsed. QuiXML::CONSERVATIVE is default. //~ See How QuiXML Parses and Outputs XML. //~ !code //~ children = tree.children_of(".", ["account", "id", 42], "ext_data") //~ children << QuiXML::parse_node(xml_buffer) if (children) //~ !end VALUE quixml_parse_node(int argc, VALUE *argv, VALUE self) { VALUE rootnode; // parse the xml buffer given and return the root node return quixml_parse_xml_buffer(argc, argv); } void Init_quixml() { // register our global string id's REGISTER_QUIXML_STR(name); REGISTER_QUIXML_STR(attributes); REGISTER_QUIXML_STR(children); REGISTER_QUIXML_STR(text); // the id of the name used to call the case // equality method quixml_id_case_eq = rb_intern("==="); // the main module quixml_module = rb_define_module("QuiXML"); // register parse/print constants as bit flags REGISTER_QUIXML_CONST(STRIP_START); REGISTER_QUIXML_CONST(STRIP_END); REGISTER_QUIXML_CONST(KEEP_MIXED); REGISTER_QUIXML_CONST(NEWLINE_TEXT); // register pre-mixed flags for various parsing/output styles REGISTER_QUIXML_CONST(PRETTY); REGISTER_QUIXML_CONST(READABLE); REGISTER_QUIXML_CONST(PERFECT); REGISTER_QUIXML_CONST(CONSERVATIVE); // register the quixml module instance functions rb_define_module_function(quixml_module, "parse", quixml_parse, -1); rb_define_module_function(quixml_module, "with_node", quixml_with_node, 1); rb_define_module_function(quixml_module, "create_node", quixml_create_node, 1); rb_define_module_function(quixml_module, "parse_node", quixml_parse_node, -1); // register quixml::tree class quixml_classmod = rb_define_class_under(quixml_module, "Tree", rb_cObject); // register the quixml::tree class instance functions rb_define_method(quixml_classmod, "initialize", quixml_tree_initialize, -1); rb_define_method(quixml_classmod, "parse", quixml_tree_parse, -1); rb_define_method(quixml_classmod, "root", quixml_tree_get_root, 0); rb_define_method(quixml_classmod, "root=", quixml_tree_set_root, 1); rb_define_method(quixml_classmod, "to_xml", quixml_tree_to_xml, -1); rb_define_method(quixml_classmod, "find_all", quixml_tree_find_all, -2); rb_define_method(quixml_classmod, "find_first", quixml_tree_find_first, -2); rb_define_method(quixml_classmod, "attributes_of", quixml_tree_attributes_of, -2); rb_define_method(quixml_classmod, "children_of", quixml_tree_children_of, -2); rb_define_method(quixml_classmod, "text_of", quixml_tree_text_of, -2); }