/* nl-xml.c - newLISP XML interface
Copyright (C) 2007 Lutz Mueller
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "newlisp.h"
#include "protos.h"
#define XML_NONE 0
#define XML_TEXT 1
#define XML_CDATA 2
#define XML_COMMENT 3
#define XML_ELEMENT 4
int isWhiteSpaceStringN(char * source, int tagPos);
CELL * makeTagSymbolCell(char * tagStart, int tagLen);
void performXmlCallback(CELL * cell, char * start);
char * typeNames[] =
{
"none",
"TEXT",
"CDATA",
"COMMENT",
"ELEMENT"
};
CELL typeCells[5] = {{0}, {0}, {0}, {0}, {0}};
static char * xmlError;
static char xmlMsg[64];
static char * sourceOrg;
static char * source;
static SYMBOL * XMLcontext;
UINT optionsFlag;
#define OPTION_NO_OPTION 0
#define OPTION_NO_WHITESPACE 1
#define OPTION_NO_EMPTY_ATTRIBUTES 2
#define OPTION_NO_COMMENTS 4
#define OPTION_TAGS_TO_SYMBOLS 8
#define OPTION_SXML_ATTRIBUTES 16
typedef struct
{
char * name;
void * next;
} TAG_STACK;
TAG_STACK * tagStack = NULL;
CELL * xmlCallback = NULL;
/* setup type tage default cells, if done already just relink */
CELL * setupTypeTagCells(void)
{
int i;
/* if never done, initialize defaults */
if(typeCells[0].contents == 0)
for(i = 0; i < 5; i++)
{
typeCells[i].type = (i == 0) ? CELL_EXPRESSION : CELL_STRING;
typeCells[i].next = nilCell;
typeCells[i].aux = (i == 0) ? (UINT)nilCell : strlen(typeNames[i]) + 1;
typeCells[i].contents = (UINT)typeNames[i];
}
/* link cells in a list */
typeCells[0].contents = (UINT)&typeCells[1];
for(i = 1; i < 4; i++)
typeCells[i].next = &typeCells[i+1];
return(&typeCells[0]);
}
CELL * p_XMLparse(CELL * params)
{
CELL * result;
if(xmlCallback != NULL)
errorProc(ERR_NOT_REENTRANT);
params = getString(params, &source);
if(params != nilCell)
{
params = getInteger(params, &optionsFlag);
if(params != nilCell)
{
XMLcontext = getCreateContext(params, TRUE);
if(params->next != nilCell)
xmlCallback = params->next;
else
xmlCallback = NULL;
}
else
XMLcontext = currentContext;
}
else
optionsFlag = OPTION_NO_OPTION;
setupTypeTagCells();
xmlError = NULL;
sourceOrg = source;
deleteTagStack();
result = parseDoc();
deleteTagStack();
if(xmlError != NULL)
return nilCell;
else
return result;
}
CELL * p_XMLtypeTags(CELL * params)
{
int i;
CELL * cell;
if(params == nilCell)
return(copyCell(setupTypeTagCells()));
setupTypeTagCells();
for(i = 1; i < 5; i++)
{
cell = evaluateExpression(params);
memcpy(&typeCells[i], cell, sizeof(CELL));
params = params->next;
}
return(copyCell(setupTypeTagCells()));
}
CELL * p_XMLerror(CELL * params)
{
CELL * errorCell;
CELL * cell;
if(xmlError == NULL)
return(nilCell);
errorCell = getCell(CELL_EXPRESSION);
cell = stuffString(xmlError);
errorCell->contents = (UINT)cell;
cell->next = stuffInteger((UINT)(source - sourceOrg));
return errorCell;
}
void deleteTagStack(void)
{
TAG_STACK * oldTagStack;
while(tagStack != NULL)
{
oldTagStack = tagStack;
freeMemory(tagStack->name);
tagStack = tagStack->next;
freeMemory(oldTagStack);
}
}
CELL * parseDoc(void)
{
CELL * node;
CELL * lastNode;
int closingFlag = FALSE;
int tagPos;
lastNode = node = getCell(CELL_EXPRESSION);
while(!xmlError && !closingFlag)
{
if((tagPos = find("<", source)) == -1) break;
if(tagPos > 0)
{
if( (tagStack != NULL) || (node->contents != (UINT)nilCell))
{
if((optionsFlag & OPTION_NO_WHITESPACE) && isWhiteSpaceStringN(source, tagPos))
{;}
else lastNode = appendNode(lastNode, makeTextNode(XML_TEXT, stuffStringN(source, tagPos)));
}
source = source + tagPos;
}
if(strncmp(source, "<!DOCTYPE", 9) == 0)
{
parseDTD();
continue;
}
if(*source == '<' && *(source + 1) == '?')
{
parseProcessingInstruction();
continue;
}
if(memcmp(source, "<!--", 4) == 0)
{
if(optionsFlag & OPTION_NO_COMMENTS)
parseTag("-->");
else
lastNode = appendNode(lastNode, parseTag("-->"));
continue;
}
if(memcmp(source, "<![CDATA[", 9) == 0)
{
lastNode = appendNode(lastNode, parseTag("]]>"));
continue;
}
if(*source == '<' && *(source + 1) == '/')
{
closingFlag = TRUE;
parseClosing();
continue;
}
lastNode = appendNode(lastNode, parseTag(">"));
}
if(xmlError != NULL)
{
deleteList(node);
return nilCell;
}
return node;
}
void parseDTD(void)
{
int closeTag, squareTag;
int closePos = 0;
char * closeTagStr;
if((closeTag = find(">", source)) == -1)
{
xmlError = "error in DTD: expected '>'";
return;
}
squareTag = find("[", source);
if(squareTag != -1 && squareTag < closeTag)
closeTagStr = "]>";
else
closeTagStr = ">";
while(!xmlError)
{
if((closePos = find(closeTagStr, source)) == -1)
{
snprintf(xmlMsg, 63, "expected: %s", closeTagStr);
xmlError = xmlMsg;
return;
}
if(*(source + closePos - 1) != ']')
break;
source = source + closePos + strlen(closeTagStr);
}
source = source + closePos + strlen(closeTagStr);
return;
}
void parseProcessingInstruction(void)
{
int closeTag;
if((closeTag = find("?>", source)) == -1)
{
xmlError = "expecting closing tag sequence '?>'";
return;
}
source = source + closeTag + 2;
}
void parseClosing(void)
{
int closeTag;
char * tagName;
TAG_STACK * oldTagStack;
if((closeTag = find(">", source)) == -1)
{
xmlError = "missing closing >";
return;
}
if(tagStack == NULL)
{
xmlError = "closing tag has no opening";
return;
}
tagName = tagStack->name;
if(strncmp(source + 2, tagName, strlen(tagName)) != 0)
{
xmlError = "closing tag doesn't match";
return;
}
/* pop tagStack */
freeMemory(tagName);
oldTagStack = tagStack;
tagStack = tagStack->next;
freeMemory(oldTagStack);
source = source + closeTag + 1;
}
CELL * parseTag(char * closeTagStr)
{
char * newSrc;
char * tagStart;
int closeTag;
CELL * cell;
tagStart = source;
cell = NULL;
closeTag = find(closeTagStr, source);
if(*(source + closeTag - 1) == '/')
{
if(memcmp(closeTagStr,"]]>",3) != 0)
{
--closeTag;
closeTagStr = "/>";
}
}
if(closeTag == -1)
{
snprintf(xmlMsg, 63, "expected closing tag: %s", closeTagStr);
xmlError = xmlMsg;
return nilCell;
}
if(memcmp(source, "<!--", 4) == 0)
{
if(optionsFlag & OPTION_NO_COMMENTS)
cell = nilCell;
else
{
cell = stuffStringN(source + 4, closeTag - 4);
cell = makeTextNode(XML_COMMENT, cell);
}
}
if(memcmp(source, "<![CDATA[", 9) == 0)
{
cell = stuffStringN(source + 9, closeTag - 9);
cell = makeTextNode(XML_CDATA, cell);
}
if(*source == '<' && *(source + 1) == '/')
{
xmlError = "closing node has no opening";
return nilCell;
}
newSrc = source + closeTag + strlen(closeTagStr);
if(cell == NULL)
cell = parseNormalTag(source + closeTag, newSrc);
else
source = newSrc;
/* call back with closed tag expression found
and opening start and end of source of this
tag expression
*/
if(xmlCallback)
performXmlCallback(cell, tagStart);
return(cell);
}
void performXmlCallback(CELL * result, char * tagStart)
{
CELL * list;
CELL * cell;
CELL * next;
int errNo;
list = getCell(CELL_EXPRESSION);
list->contents = (UINT)copyCell(xmlCallback);
cell = getCell(CELL_QUOTE);
cell->contents = (UINT)copyCell(result);
cell->next = stuffInteger((UINT)(tagStart - sourceOrg));
next = cell->next;
next->next = stuffInteger((UINT)(source - tagStart));
((CELL*)list->contents)->next = cell;
pushResult(list);
if(!evaluateExpressionSafe(list, &errNo))
{
deleteTagStack();
longjmp(errorJump, errNo);
}
}
CELL * parseNormalTag(char * endSrc, char * newSrc)
{
char * tagStart;
int tagLen;
CELL * attributes;
CELL * childs;
CELL * tagCell;
TAG_STACK * tag;
++source; /* skip '/' */
while(*source <= ' ' && source < endSrc) ++source; /* skip whitespace */
tagStart = source;
tagLen = 0;
while(*source > ' ' && source < endSrc) ++source, ++tagLen; /* find tag end */
attributes = parseAttributes(endSrc);
if(optionsFlag & OPTION_SXML_ATTRIBUTES)
{
childs = (CELL*)attributes->contents;
if(! (childs == nilCell && (optionsFlag & OPTION_NO_EMPTY_ATTRIBUTES)))
{
attributes->contents = (UINT)stuffSymbol(atSymbol);
((CELL*)(attributes->contents))->next = childs;
}
}
if(xmlError)
return nilCell;
if(*source == '/' && *(source + 1) == '>')
{
source = newSrc;
if(optionsFlag & OPTION_TAGS_TO_SYMBOLS)
tagCell = makeTagSymbolCell(tagStart, tagLen);
else
tagCell = stuffStringN(tagStart, tagLen);
return makeElementNode(tagCell, attributes, getCell(CELL_EXPRESSION));
}
/* push tag on tagstack */
tag = (TAG_STACK*)allocMemory(sizeof(TAG_STACK));
tag->name = (char *)callocMemory(tagLen + 1);
memcpy(tag->name, tagStart, tagLen);
tag->next = tagStack;
tagStack = tag;
source = newSrc;
childs = parseDoc();
if(optionsFlag & OPTION_TAGS_TO_SYMBOLS)
tagCell = makeTagSymbolCell(tagStart, tagLen);
else
tagCell = stuffStringN(tagStart, tagLen);
return makeElementNode(tagCell, attributes, childs);
}
CELL * makeTagSymbolCell(char * tagStart, int tagLen)
{
char * name;
CELL * cell;
name = (char *)callocMemory(tagLen + 1);
memcpy(name, tagStart, tagLen);
cell = stuffSymbol(translateCreateSymbol(name, CELL_NIL, XMLcontext, 0));
freeMemory(name);
return(cell);
}
CELL * parseAttributes(char * endSrc)
{
CELL * attributes;
CELL * att;
CELL * cell;
CELL * lastAtt;
char * namePos;
char * valPos;
char quoteChar;
int nameLen, valLen;
attributes = getCell(CELL_EXPRESSION);
lastAtt = NULL;
while(!xmlError && source < endSrc)
{
while(*source <= ' ' && source < endSrc) source++; /* strip leading space */
namePos = source;
nameLen = 0;
while(*source > ' ' && *source != '=' && source < endSrc) source++, nameLen++; /* get end */
if(nameLen == 0) break;
while(*source <= ' ' && source < endSrc) source++; /* strip leading space */
if(*source != '=')
{
xmlError = "expected '=' in attributes";
deleteList(attributes);
return nilCell;
}
else source++;
while(*source <= ' ' && source < endSrc) source++; /* strip spaces */
if(*source != '\"' && *source != '\'')
{
xmlError = "attribute values must be delimited by \" or \' ";
deleteList(attributes);
return nilCell;
}
quoteChar = *source;
source++;
valPos = source;
valLen = 0;
while(*source != quoteChar && source < endSrc) source++, valLen++;
if(*source != quoteChar) valLen = -1;
else source++;
if(nameLen == 0 || valLen == -1)
{
xmlError = "incorrect attribute";
deleteList(attributes);
return nilCell;
}
att = getCell(CELL_EXPRESSION);
if(optionsFlag & OPTION_TAGS_TO_SYMBOLS)
cell = makeTagSymbolCell(namePos, nameLen);
else
cell = stuffStringN(namePos, nameLen);
cell->next = stuffStringN(valPos, valLen);
att->contents = (UINT)cell;
if(lastAtt == NULL)
attributes->contents = (UINT)att;
else
lastAtt->next = att;
lastAtt = att;
}
return attributes;
}
CELL * appendNode(CELL * node, CELL * newNode)
{
if(node->contents == (UINT)nilCell)
node->contents = (UINT)newNode;
else
node->next = newNode;
return newNode;
}
CELL * makeTextNode(int type, CELL * contents)
{
CELL * newNode;
CELL * cell;
/* unwrap text node if nil xml-type-tag */
if(typeCells[type].type == CELL_NIL)
return(contents);
newNode = getCell(CELL_EXPRESSION);
cell = copyCell(&typeCells[type]);
newNode->contents = (UINT)cell;
cell->next = contents;
return newNode;
}
CELL * makeElementNode(CELL * tagNode, CELL * attributesNode, CELL * childrenNode)
{
CELL * newNode;
CELL * cell;
/* unwrap children node, if nil in xml-type-tag */
if(typeCells[XML_ELEMENT].type == CELL_NIL)
{
cell = childrenNode;
childrenNode = (CELL *)childrenNode->contents;
cell->contents = (UINT)nilCell;
deleteList(cell);
}
newNode = getCell(CELL_EXPRESSION);
if(typeCells[XML_ELEMENT].type == CELL_NIL)
newNode->contents = (UINT)tagNode;
else
{
cell = copyCell(&typeCells[XML_ELEMENT]);
newNode->contents = (UINT)cell;
cell->next = tagNode;
}
if( (attributesNode->contents == (UINT)nilCell) &&
(optionsFlag & OPTION_NO_EMPTY_ATTRIBUTES))
{
tagNode->next = childrenNode;
deleteList(attributesNode);
}
else
{
tagNode->next = attributesNode;
attributesNode->next = childrenNode;
}
return newNode;
}
int find(char * key, char * source)
{
char * ptr;
ptr = strstr(source, key);
if(ptr == NULL) return -1;
return(ptr - source);
}
int isWhiteSpaceStringN(char * source, int tagPos)
{
while(tagPos--) if((unsigned char)*source++ > 32) return(FALSE);
return(TRUE);
}
/* eof */
syntax highlighted by Code2HTML, v. 0.9.1