Index: configure ================================================================== --- configure +++ configure @@ -759,10 +759,11 @@ enable_dtd enable_ns enable_ucmd enable_tdomalloc enable_lessns +enable_dtdvalidation ' ac_precious_vars='build_alias host_alias target_alias CC @@ -1405,10 +1406,11 @@ --enable-ns build with XML namespace support (default: on) --enable-unknown enable built-in unknown command (default: off) --enable-tdomalloc build with the tDOM allocator (default: off) --enable-lessns build with lower limit for XML ns declarations (default: off) + --enable-dtdvalidation build with valiation features (default: on) Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-tcl directory containing tcl configuration @@ -8891,10 +8893,38 @@ else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable dtd valiation" >&5 +$as_echo_n "checking whether to enable dtd valiation... " >&6; } + # Check whether --enable-dtdvalidation was given. +if test "${enable_dtdvalidation+set}" = set; then : + enableval=$enable_dtdvalidation; tcl_ok=$enableval +else + tcl_ok=yes +fi + + + if test "${enable_dtdvalidation+set}" = set; then + enableval="$enable_dtdvalidation" + tcl_ok=$enableval + else + tcl_ok=yes + fi + + if test "$tcl_ok" = "no" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + $as_echo "#define TDOM_NO_DTD_VALIDATION 1" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + fi + TDOMSHELL=tcldomsh Index: configure.ac ================================================================== --- configure.ac +++ configure.ac @@ -217,10 +217,11 @@ TDOM_ENABLE_DTD TDOM_ENABLE_NS TDOM_ENABLE_UNKNOWN TDOM_ENABLE_TDOMALLOC TDOM_ENABLE_LESS_NS +TDOM_ENABLE_DTD_VALIDATION TDOMSHELL=tcldomsh AC_SUBST(TDOMSHELL) TDOM_EXPORT_CONFIG Index: doc/domNode.xml ================================================================== --- doc/domNode.xml +++ doc/domNode.xml @@ -378,13 +378,14 @@

Similar to the way described above to inject literals in a secure way into the XPath expression using tcl variable references there is a syntax to inject element names from tcl variables. At every place where the XPath syntax allows a node test there could be a tcl variable reference (in any form), just the leading $ replaced with %. -This allows one to select nodes with 'strange' (invalid, according to the -appropriate XML production rule) node names which may be needed in -case of working with JSON data.

+This allows one to select nodes with 'strange' (invalid, according to +the appropriate XML production rule) node names which may be needed in +case of working with JSON data or if the DOM tree was build with +-ignorexmlns.

The option -namespaces expects a tcl list with prefix / namespace pairs as argument. If this option is not given, then any namespace prefix within the xpath expression will be first resolved against the list of prefix / namespace pairs set with the Index: extensions/tnc/tnc.c ================================================================== --- extensions/tnc/tnc.c +++ extensions/tnc/tnc.c @@ -2061,11 +2061,10 @@ fprintf (stderr, "error!!! - in TncProbeElementEnd: unknown content " "type: %d\n", stackelm.model->type); return 1; } } - /* *---------------------------------------------------------------------------- * * TncElementEndCommand -- Index: generic/dom.c ================================================================== --- generic/dom.c +++ generic/dom.c @@ -120,10 +120,151 @@ int depth; const char *baseURI; } domActiveBaseURI; + +#ifndef TDOM_NO_DTD_VALIDATION +/* The elements of TNC_Content carry exactly the same information + as expats XML_Content. But the element is identified by his + Tcl_HashEntry entry within the "tagNames" Hashtable (see TNC_Data) + and not the element name. This should be much more efficient. */ +typedef struct TNC_cp TNC_Content; +typedef struct TNC_elemAttInfo TNC_ElemAttInfo; + +struct TNC_cp +{ + enum XML_Content_Type type; + enum XML_Content_Quant quant; + Tcl_HashEntry *nameId; + unsigned int numchildren; + TNC_Content *children; + TNC_ElemAttInfo *attInfo; +}; + +typedef struct TNC_contentStack +{ + TNC_Content *model; + int activeChild; + int deep; + int alreadymatched; +} TNC_ContentStack; + +typedef enum TNC_attType { + TNC_ATTTYPE_CDATA, + TNC_ATTTYPE_ID, + TNC_ATTTYPE_IDREF, + TNC_ATTTYPE_IDREFS, + TNC_ATTTYPE_ENTITY, + TNC_ATTTYPE_ENTITIES, + TNC_ATTTYPE_NMTOKEN, + TNC_ATTTYPE_NMTOKENS, + TNC_ATTTYPE_NOTATION, + TNC_ATTTYPE_ENUMERATION, +} TNC_AttType; + +struct TNC_elemAttInfo +{ + Tcl_HashTable *attributes; + int nrOfreq; + int nrOfIdAtts; +}; + +typedef struct TNC_attDecl +{ + TNC_AttType att_type; + char *dflt; + int isrequired; + Tcl_HashTable *lookupTable; /* either NotationTypes or enum values */ +} TNC_AttDecl; + +typedef struct TNC_entityInfo +{ + int is_notation; + char *notationName; +} TNC_EntityInfo; + +typedef Tcl_HashEntry TNC_NameId; + +#define TNC_INITCONTENTSTACKSIZE 512 + +enum TNC_Error { + TNC_ERROR_NONE, + TNC_ERROR_DUPLICATE_ELEMENT_DECL, + TNC_ERROR_DUPLICATE_MIXED_ELEMENT, + TNC_ERROR_UNKNOWN_ELEMENT, + TNC_ERROR_EMPTY_ELEMENT, + TNC_ERROR_DISALLOWED_PCDATA, + TNC_ERROR_DISALLOWED_CDATA, + TNC_ERROR_NO_DOCTYPE_DECL, + TNC_ERROR_WRONG_ROOT_ELEMENT, + TNC_ERROR_NO_ATTRIBUTES, + TNC_ERROR_UNKNOWN_ATTRIBUTE, + TNC_ERROR_WRONG_FIXED_ATTVALUE, + TNC_ERROR_MISSING_REQUIRED_ATTRIBUTE, + TNC_ERROR_MORE_THAN_ONE_ID_ATT, + TNC_ERROR_ID_ATT_DEFAULT, + TNC_ERROR_DUPLICATE_ID_VALUE, + TNC_ERROR_UNKNOWN_ID_REFERRED, + TNC_ERROR_ENTITY_ATTRIBUTE, + TNC_ERROR_ENTITIES_ATTRIBUTE, + TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED, + TNC_ERROR_NOTATION_REQUIRED, + TNC_ERROR_NOTATION_MUST_BE_DECLARED, + TNC_ERROR_IMPOSSIBLE_DEFAULT, + TNC_ERROR_ENUM_ATT_WRONG_VALUE, + TNC_ERROR_NMTOKEN_REQUIRED, + TNC_ERROR_NAME_REQUIRED, + TNC_ERROR_NAMES_REQUIRED, + TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE, + TNC_ERROR_ELEMENT_CAN_NOT_END_HERE, + TNC_ERROR_ONLY_THREE_BYTE_UTF8, + TNC_ERROR_UNKNOWN_NODE_TYPE +}; + +const char * +TNC_ErrorString (int code) +{ + static const char *message[] = { + "No error.", + "Element declared more than once.", + "The same name must not appear more than once in \n\tone mixed-content declaration.", + "No declaration for this element.", + "Element is declared to be empty, but isn't.", + "PCDATA not allowed here.", + "CDATA section not allowed here.", + "No DOCTYPE declaration.", + "Root element doesn't match DOCTYPE name.", + "No attributes defined for this element.", + "Unknown attribute for this element.", + "Attribute value must match the FIXED default.", + "Required attribute missing.", + "Only one attribute with type ID allowed.", + "No default value allowed for attribute type ID.", + "ID attribute values must be unique within the document.", + "Unknown ID referred.", + "Attribute value has to be a unparsed entity.", + "Attribute value has to be a sequence of unparsed entities.", + "The defaults of attributes with type ENTITY or ENTITIES\nhas to be unparsed entities.", + "Attribute value has to be one of the allowed notations.", + "Every used NOTATION must be declared.", + "Attribute default is not one of the allowed values", + "Attribute hasn't one of the allowed values.", + "Attribute value has to be a NMTOKEN.", + "Attribute value has to be a Name.", + "Attribute value has to match production Names.", + "Element is not allowed here.", + "Element can not end here (required element(s) missing).", + "Can only handle UTF8 chars up to 3 bytes length." + "Unknown or unexpected dom node type." + }; +/* if (code > 0 && code < sizeof(message)/sizeof(message[0])) */ + return message[code]; + return 0; +} +#endif + /*--------------------------------------------------------------------------- | type domReadInfo | \--------------------------------------------------------------------------*/ typedef struct _domReadInfo { @@ -146,20 +287,86 @@ domActiveNS *activeNS; int baseURIstackSize; int baseURIstackPos; domActiveBaseURI *baseURIstack; int insideDTD; +#ifndef TDOM_NO_DTD_VALIDATION + int dtdvalidation; +#endif int status; - +#ifndef TDOM_NO_DTD_VALIDATION + /* DTD validation releated struct members follow */ + char *doctypeName; /* From DOCTYPE declaration */ + int skipWhiteCDATAs; /* Flag: white space allowed in + current content model? */ + int ignorePCDATA; /* Flag: currently mixed content + model? */ + Tcl_HashTable *tagNames; /* Hash table of all ELEMENT + declarations of the DTD. + Element name is the key. + While parsing, entry points + to the XML_Content of that + Element, after finishing of + DTD parsing, entry holds a + pointer to the TNC_Content + of that element. */ + TNC_ElemAttInfo *elemAttInfo; /* TncElementStartCommand stores + the elemAttInfo pointer of + the current element here for + DOM validation, to avoid two + element name lookups. */ + int elemContentsRewriten; /* Signals, if the tagNames + entries point to + TNC_Contents */ + int dtdstatus; /* While used with expat obj: + 1 after successful parsed + DTD, 0 otherwise. + For validateCmd used for + error report during + validation: 0 OK, 1 validation + error. */ + int idCheck; /* Flag: check IDREF resolution*/ + Tcl_HashTable *attDefsTables; /* Used to store ATTLIST + declarations while parsing. + Keys are the element names. */ + Tcl_HashTable *entityDecls; /* Used to store ENTITY + declarations. */ + Tcl_HashTable *notationDecls; /* Used to store NOTATION + declarations. */ + Tcl_HashTable *ids; /* Used to track IDs */ + int contentStackSize; /* Current size of the content + stack */ + int contentStackPtr; /* Points to the currently active + content model on the stack */ + TNC_ContentStack *contentStack; /* Stack for the currently + nested open content models. */ +#endif } domReadInfo; /*---------------------------------------------------------------------------- | Prototypes | \---------------------------------------------------------------------------*/ static void DispatchPCDATA (domReadInfo *info); +#ifndef TDOM_NO_DTD_VALIDATION +#define CHECK_UTF_CHARLEN(d) if (!(d)) { \ + signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\ + return;\ + } + +#define CHECK_UTF_CHARLENR(d) if (!(d)) { \ + signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\ + return 0;\ + } + +#define CHECK_UTF_CHARLEN_COPY(d) if (!(d)) { \ + signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\ + FREE (copy);\ + return;\ + } +#endif #ifndef TCL_THREADS /*--------------------------------------------------------------------------- | domModuleFinalize @@ -229,10 +436,32 @@ domProcessingInstructionNode * coerceToProcessingInstructionNode( domNode *n ) { return (domProcessingInstructionNode *)n; } + +#ifndef TDOM_NO_DTD_VALIDATION +static void +signalNotValid (userData, code) + void *userData; + int code; +{ + domReadInfo *tncdata = (domReadInfo *) userData; + char linenr[50], colnr[50]; + + + tncdata->status = TCL_ERROR; + sprintf(linenr, "%ld", XML_GetCurrentLineNumber(tncdata->parser)); + sprintf(colnr, "%ld", XML_GetCurrentColumnNumber(tncdata->parser)); + Tcl_ResetResult (tncdata->interp); + Tcl_AppendResult (tncdata->interp, "At line ", linenr, ", column ", + colnr, ": ", (char *)TNC_ErrorString (code), + NULL); + XML_StopParser(tncdata->parser, 1); +} +#endif + /*--------------------------------------------------------------------------- | domIsNAME | \--------------------------------------------------------------------------*/ int @@ -1113,10 +1342,935 @@ return NULL; } #ifndef TDOM_NO_EXPAT +#ifndef TDOM_NO_DTD_VALIDATION +/* + *---------------------------------------------------------------------------- + * + * TncRewriteModel -- + * + * This helper procedure creates recursively a TNC_Content from + * a XML_Content. + * + * Results: + * None. + * + * Side effects: + * Allocates memory for the TNC_Content models. + * + *---------------------------------------------------------------------------- + */ + +static void +TncRewriteModel ( + XML_Content *emodel, + TNC_Content *tmodel, + Tcl_HashTable *tagNames +) +{ + Tcl_HashEntry *entryPtr; + unsigned int i; + + tmodel->type = emodel->type; + tmodel->quant = emodel->quant; + tmodel->numchildren = emodel->numchildren; + tmodel->children = NULL; + tmodel->nameId = NULL; + switch (emodel->type) { + case XML_CTYPE_MIXED: + if (emodel->quant == XML_CQUANT_REP) { + tmodel->children = (TNC_Content *) + MALLOC (sizeof (TNC_Content) * emodel->numchildren); + for (i = 0; i < emodel->numchildren; i++) { + TncRewriteModel (&emodel->children[i], &tmodel->children[i], + tagNames); + } + } + break; + case XML_CTYPE_ANY: + case XML_CTYPE_EMPTY: + /* do nothing */ + break; + case XML_CTYPE_SEQ: + case XML_CTYPE_CHOICE: + tmodel->children = (TNC_Content *) + MALLOC (sizeof (TNC_Content) * emodel->numchildren); + for (i = 0; i < emodel->numchildren; i++) { + TncRewriteModel (&emodel->children[i], &tmodel->children[i], + tagNames); + } + break; + case XML_CTYPE_NAME: + entryPtr = Tcl_FindHashEntry (tagNames, emodel->name); + /* Notice, that it is possible for entryPtr to be NULL. + This means, a content model uses a not declared element. + This is legal even in valid documents. (Of course, if the + undeclared element actually shows up in the document + that would make the document invalid.) See rec 3.2 + + QUESTION: Should there be a flag to enable a warning, + when a declaration contains an element type for which + no declaration is provided, as rec 3.2 metioned? + This would be the appropriated place to omit the + warning. */ + tmodel->nameId = entryPtr; + } +} + +/* + *---------------------------------------------------------------------------- + * + * TncEndDoctypeDeclHandler -- + * + * This procedure is called at the end of the DOCTYPE + * declaration, after processing any external subset. + * It rewrites the XML_Content models to TNC_Content + * models and frees the XML_Content models. + * + * Results: + * None. + * + * Side effects: + * Rewrites the XML_Content models to TNC_Content + * models. + * + *---------------------------------------------------------------------------- + */ + +static void +TncEndDoctypeDeclHandler ( + void *userData +) +{ + domReadInfo *tncdata = (domReadInfo *) userData; + Tcl_HashEntry *entryPtr, *ePtr1; + Tcl_HashSearch search; + XML_Content *emodel; + TNC_Content *tmodel = NULL; + char *elementName; + + entryPtr = Tcl_FirstHashEntry (tncdata->tagNames, &search); + while (entryPtr != NULL) { +#ifdef TNC_DEBUG + printf ("name: %-20s nameId: %p\n", + Tcl_GetHashKey (tncdata->tagNames, entryPtr), + entryPtr); +#endif + emodel = (XML_Content*) Tcl_GetHashValue (entryPtr); + tmodel = (TNC_Content*) MALLOC (sizeof (TNC_Content)); + TncRewriteModel (emodel, tmodel, tncdata->tagNames); + elementName = Tcl_GetHashKey (tncdata->tagNames, entryPtr); + ePtr1 = Tcl_FindHashEntry (tncdata->attDefsTables, elementName); + if (ePtr1) { + tmodel->attInfo = (TNC_ElemAttInfo *) Tcl_GetHashValue (ePtr1); + } else { + tmodel->attInfo = NULL; + } + Tcl_SetHashValue (entryPtr, tmodel); + XML_MemFree (tncdata->parser, emodel); + entryPtr = Tcl_NextHashEntry (&search); + } + tncdata->elemContentsRewriten = 1; + /* Checks, if every used notation name is in deed declared */ + entryPtr = Tcl_FirstHashEntry (tncdata->notationDecls, &search); + while (entryPtr != NULL) { +#ifdef TNC_DEBUG + printf ("check notation name %s\n", + Tcl_GetHashKey (tncdata->notationDecls, entryPtr)); + printf ("value %p\n", Tcl_GetHashValue (entryPtr)); +#endif + if (!Tcl_GetHashValue (entryPtr)) { + signalNotValid (userData, TNC_ERROR_NOTATION_MUST_BE_DECLARED); + return; + } + entryPtr = Tcl_NextHashEntry (&search); + } + /* Checks, if every used entity name is indeed declared */ + entryPtr = Tcl_FirstHashEntry (tncdata->entityDecls, &search); + while (entryPtr != NULL) { + if (!Tcl_GetHashValue (entryPtr)) { + signalNotValid (userData, + TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED); + return; + } + entryPtr = Tcl_NextHashEntry (&search); + } + tncdata->dtdstatus = 1; +} + +/* + *---------------------------------------------------------------------------- + * + * TncProbeElement -- + * + * This function checks, if the element match the + * topmost content model on the content stack. + * + * Results: + * 1 if the element match, + * 0 if not. + * -1 if not, but this isn't a validation error + * + * Side effects: + * Eventually pushes data to the contentStack (even in + * recurive calls). + * + *---------------------------------------------------------------------------- + */ + +static int +TncProbeElement ( + TNC_NameId *nameId, + domReadInfo *tncdata +) +{ + TNC_ContentStack *stackelm; + TNC_Content *activeModel; + int myStackPtr, zeroMatchPossible, result; + unsigned int i, seqstartindex; + +#ifdef TNC_DEBUG + printf ("TncProbeElement start\n"); + printContentStack (tncdata); +#endif + myStackPtr = tncdata->contentStackPtr - 1; + stackelm = &(tncdata->contentStack)[myStackPtr]; + switch (stackelm->model->type) { + case XML_CTYPE_MIXED: +#ifdef TNC_DEBUG + printf ("TncProbeElement XML_CTYPE_MIXED\n"); +#endif + for (i = 0; i < stackelm->model->numchildren; i++) { + if ((&stackelm->model->children[i])->nameId == nameId) { + return 1; + } + } + return 0; + case XML_CTYPE_ANY: +#ifdef TNC_DEBUG + printf ("TncProbeElement XML_CTYPE_ANY\n"); +#endif + return 1; + case XML_CTYPE_EMPTY: +#ifdef TNC_DEBUG + printf ("TncProbeElement XML_CTYPE_EMPTY\n"); +#endif + return 0; + case XML_CTYPE_CHOICE: +#ifdef TNC_DEBUG + printf ("TncProbeElement XML_CTYPE_CHOICE\n"); +#endif + if (stackelm->alreadymatched) { + activeModel = &stackelm->model->children[stackelm->activeChild]; + if (activeModel->type == XML_CTYPE_NAME) { + /* so this stackelement must be the topmost */ + if (activeModel->quant == XML_CQUANT_REP + || activeModel->quant == XML_CQUANT_PLUS) { + /* the last matched element is multiple, maybe it + matches again */ + if (nameId == activeModel->nameId) { +#ifdef TNC_DEBUG + printf ("-->matched! child Nr. %d\n", + stackelm->activeChild); +#endif + /* stack and activeChild nr. are already OK, just + report success. */ + return 1; + } + } + } + /* The active child is a SEQ or CHOICE. */ + if (stackelm->model->quant == XML_CQUANT_NONE || + stackelm->model->quant == XML_CQUANT_OPT) { + /*The child cp's type SEQ or CHOICE keep track by + themselve about if they are repeated. Because we are + here, they don't. Since the current cp has already + matched and isn't multiple, the current cp as a whole + is done. But no contradiction detected, so return + "search further" */ + return -1; + } + } + + /* If one of the alternatives within the CHOICE cp is quant + REP or OPT, it isn't a contradition to the document structure, + if the cp doesn't match, even if it is quant + NONE or PLUS, because of the "zero time" match of this one + alternative. We use zeroMatchPossible, to know about this.*/ + zeroMatchPossible = 0; + for (i = 0; i < stackelm->model->numchildren; i++) { + if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) { +#ifdef TNC_DEBUG + printf ("child is type NAME\n"); +#endif + if ((&stackelm->model->children[i])->nameId == nameId) { +#ifdef TNC_DEBUG + printf ("-->matched! child Nr. %d\n",i); +#endif + (&tncdata->contentStack[myStackPtr])->activeChild = i; + (&tncdata->contentStack[myStackPtr])->alreadymatched = 1; + return 1; + } + else { + /* If the name child is optional, we have a + candidat for "zero match". */ + if ((&stackelm->model->children[i])->quant + == XML_CQUANT_OPT || + (&stackelm->model->children[i])->quant + == XML_CQUANT_REP) { +#ifdef TNC_DEBUG + printf ("zero match possible\n"); +#endif + zeroMatchPossible = 1; + } + } + } + else { +#ifdef TNC_DEBUG + printf ("complex child type\n"); +#endif + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *) * 2 * + tncdata->contentStackSize); + tncdata->contentStackSize *= 2; + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model + = &stackelm->model->children[i]; + tncdata->contentStack[tncdata->contentStackPtr].activeChild + = 0; + tncdata->contentStack[tncdata->contentStackPtr].deep + = stackelm->deep + 1; + tncdata->contentStack[tncdata->contentStackPtr].alreadymatched + = 0; + tncdata->contentStackPtr++; + result = TncProbeElement (nameId, tncdata); + if (result == 1) { +#ifdef TNC_DEBUG + printf ("-->matched! child nr. %d\n",i); +#endif + (&tncdata->contentStack[myStackPtr])->activeChild = i; + (&tncdata->contentStack[myStackPtr])->alreadymatched = 1; + return 1; + } + /* The child cp says, it doesn't has matched, but says + also, it's perfectly OK, if it doesn't at all. So we + have a candidat for "zero match". */ + if (result == -1) { + zeroMatchPossible = 1; + } + tncdata->contentStackPtr--; + } + } + /* OK, nobody has claimed a match. Question is: try further or is + this a document structure error. */ + if (zeroMatchPossible || + stackelm->alreadymatched || + stackelm->model->quant == XML_CQUANT_REP || + stackelm->model->quant == XML_CQUANT_OPT) { + return -1; + } +#ifdef TNC_DEBUG + printf ("validation error\n"); +#endif + return 0; + case XML_CTYPE_SEQ: +#ifdef TNC_DEBUG + printf ("TncProbeElement XML_CTYPE_SEQ\n"); +#endif + if (stackelm->alreadymatched) { + activeModel = &stackelm->model->children[stackelm->activeChild]; + if (activeModel->type == XML_CTYPE_NAME) { + /* so this stackelement must be the topmost */ + if (activeModel->quant == XML_CQUANT_REP + || activeModel->quant == XML_CQUANT_PLUS) { + /* the last matched element is multiple, maybe it + matches again */ + if (nameId == activeModel->nameId) { +#ifdef TNC_DEBUG + printf ("-->matched! child Nr. %d\n", + stackelm->activeChild); +#endif + /* stack and activeChild nr. are already OK, just + report success. */ + return 1; + } + } + } + } + + if (stackelm->alreadymatched) { + seqstartindex = stackelm->activeChild + 1; + } + else { + seqstartindex = 0; + } + /* This time zeroMatchPossible flags, if every of the remaining + childs - that may every child, if !alreadymatched - doesn't + must occur. We assume, the (outstanding childs of, in case + of alreadymatched) current stackelement model has only + optional childs, and set to wrong, if we find any + non-optional child */ + zeroMatchPossible = 1; + for (i = seqstartindex; i < stackelm->model->numchildren; i++) { + if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) { + if ((&stackelm->model->children[i])->nameId == nameId) { +#ifdef TNC_DEBUG + printf ("-->matched! child Nr. %d\n",i); +#endif + (&tncdata->contentStack[myStackPtr])->activeChild = i; + (&tncdata->contentStack[myStackPtr])->alreadymatched = 1; + return 1; + } else if ((&stackelm->model->children[i])->quant + == XML_CQUANT_NONE + || (&stackelm->model->children[i])->quant + == XML_CQUANT_PLUS) { + zeroMatchPossible = 0; + break; + } + } else { + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *) * 2 * + tncdata->contentStackSize); + tncdata->contentStackSize *= 2; + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model = + &stackelm->model->children[i]; + tncdata->contentStack[tncdata->contentStackPtr].activeChild + = 0; + tncdata->contentStack[tncdata->contentStackPtr].deep + = stackelm->deep + 1; + tncdata->contentStack[tncdata->contentStackPtr].alreadymatched + = 0; + tncdata->contentStackPtr++; + result = TncProbeElement (nameId, tncdata); + if (result == 1) { + (&tncdata->contentStack[myStackPtr])->activeChild = i; + (&tncdata->contentStack[myStackPtr])->alreadymatched = 1; + return 1; + } + tncdata->contentStackPtr--; + if (result == 0) { + zeroMatchPossible = 0; + break; + } + } + } + if (!stackelm->alreadymatched) { + if (zeroMatchPossible) { + /* The stackelm hasn't matched, but don't have to + after all. Return try further */ + return -1; + } else { + /* No previous match, but at least one child is + necessary. Return depends of the quant of the + entire seq */ + if (stackelm->model->quant == XML_CQUANT_NONE || + stackelm->model->quant == XML_CQUANT_PLUS) { + /* DTD claims, the seq as to be there, but isn't */ + return 0; + } else { + /* The seq is optional */ + return -1; + } + } + } + if (stackelm->alreadymatched) { + if (!zeroMatchPossible) { + /* Some child at the start of the seq has matched in + the past, but since zeroMatchPossible has changed + to zero, there must be a non-matching non-optional + child later. Error in document structure. */ + return 0; + } else { + /* OK, SEQ has matched befor. But after the last match, there + where no required (quant NONE or PLUS) childs. */ + if (stackelm->model->quant == XML_CQUANT_NONE || + stackelm->model->quant == XML_CQUANT_OPT) { + /* The entire seq isn't multiple. Just look further. */ + return -1; + } + } + } + /* The last untreated case is alreadymatched true, + zeroMatchPossible (of the rest of the seq childs after the + last match) true and the entire seq may be + multiple. Therefore start again with activeChild = 0, to + see, if the current nameId starts a repeated match of the + seq. By the way: zeroMatchPossible still has initial value + 1, therefor no second initialiation is needed */ + for (i = 0; i < seqstartindex; i++) { + if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) { + if ((&stackelm->model->children[i])->nameId == nameId) { +#ifdef TNC_DEBUG + printf ("-->matched! child Nr. %d\n",i); +#endif + (&tncdata->contentStack[myStackPtr])->activeChild = i; + (&tncdata->contentStack[myStackPtr])->alreadymatched = 1; + return 1; + } else if ((&stackelm->model->children[i])->quant + == XML_CQUANT_NONE + || (&stackelm->model->children[i])->quant + == XML_CQUANT_PLUS) { + zeroMatchPossible = 0; + break; + } + } else { + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *) * 2 * + tncdata->contentStackSize); + tncdata->contentStackSize *= 2; + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model = + &stackelm->model->children[i]; + tncdata->contentStack[tncdata->contentStackPtr].activeChild + = 0; + tncdata->contentStack[tncdata->contentStackPtr].deep + = stackelm->deep + 1; + tncdata->contentStack[tncdata->contentStackPtr].alreadymatched + = 0; + tncdata->contentStackPtr++; + result = TncProbeElement (nameId, tncdata); + if (result) { + (&tncdata->contentStack[myStackPtr])->activeChild = i; + /* alreadymatched is already 1 */ + return 1; + } + tncdata->contentStackPtr--; + if (result == 0) { + /* OK, the seq doesn't match again. But since it have + already matched, this isn't return 0 but.. */ + return -1; + } + } + } + /* seq doesn't match again and every seq child from the very first + up to (not including) the last match aren't required. This last + fact may be nice to know, but after all since the entire seq have + matched already ... */ + return -1; + case XML_CTYPE_NAME: + /* NAME type dosen't occur at top level of a content model and is + handled in some "shotcut" way directly in the CHOICE and SEQ cases. + It's only here to pacify gcc -Wall. */ + printf ("error!!! - in TncProbeElement: XML_CTYPE_NAME shouldn't reached in any case.\n"); + default: + printf ("error!!! - in TncProbeElement: unknown content type: %d\n", + stackelm->model->type); + } + /* not reached */ + printf ("error!!! - in TncProbeElement: end of function reached.\n"); + return 0; +} + +/* + *---------------------------------------------------------------------------- + * + * TncProbeAttribute -- + * + * This function checks, if the given attribute + * and it's value are allowed for this element. + * + * Results: + * 1 if the attribute name/value is OK, + * 0 if not. + * + * Side effects: + * Eventually increments the required attributes counter. + * + *---------------------------------------------------------------------------- + */ + +static int +TncProbeAttribute ( + void *userData, + Tcl_HashTable *elemAtts, + char *attrName, + char *attrValue, + int *nrOfreq +) +{ + domReadInfo *tncdata = (domReadInfo *) userData; + Tcl_HashEntry *entryPtr; + TNC_AttDecl *attDecl; + char *pc, *copy, save; + int clen, i, start, hnew; + TNC_EntityInfo *entityInfo; + + entryPtr = Tcl_FindHashEntry (elemAtts, attrName); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_UNKNOWN_ATTRIBUTE); + return 0; + } + /* NOTE: attribute uniqueness per element is a wellformed + constrain and therefor done by expat. */ + attDecl = (TNC_AttDecl *) Tcl_GetHashValue (entryPtr); + switch (attDecl->att_type) { + case TNC_ATTTYPE_CDATA: + if (attDecl->isrequired && attDecl->dflt) { + if (strcmp (attDecl->dflt, attrValue) != 0) { + signalNotValid (userData, + TNC_ERROR_WRONG_FIXED_ATTVALUE); + return 0; + } + } + break; + + case TNC_ATTTYPE_ID: + pc = (char*)attrValue; + clen = UTF8_CHAR_LEN (*pc); + CHECK_UTF_CHARLENR (clen); + if (!UTF8_GET_NAME_START (pc, clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + } + pc += clen; + while (1) { + if (*pc == '\0') { + break; + } + clen = UTF8_CHAR_LEN (*pc); + CHECK_UTF_CHARLENR (clen); + if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return 0; + } + pc += clen; + } + entryPtr = Tcl_CreateHashEntry (tncdata->ids, attrValue, &hnew); + if (!hnew) { + if (Tcl_GetHashValue (entryPtr)) { + signalNotValid (userData, + TNC_ERROR_DUPLICATE_ID_VALUE); + return 0; + } + } + Tcl_SetHashValue (entryPtr, (char *) 1); + break; + + case TNC_ATTTYPE_IDREF: + /* Name type constraint "implicit" checked. If the + referenced ID exists, the type must be OK, because the + type of the ID's within the document are checked. + If there isn't such an ID, it's an error anyway. */ + if (attrValue[0] == '\0') { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return 0; + } + entryPtr = Tcl_CreateHashEntry (tncdata->ids, attrValue, &hnew); + break; + + case TNC_ATTTYPE_IDREFS: + if (attrValue[0] == '\0') { + signalNotValid (userData, TNC_ERROR_NAMES_REQUIRED); + return 0; + } + /* Due to attribute value normalization (xml rec 3.3.3) this + is a simple list "ref ref ref ..." without leading or + trailing spaces and exact one space between the refs. */ + start = i = 0; + while (attrValue[i]) { + if (attrValue[i] == ' ') { + save = attrValue[i]; + attrValue[i] = '\0'; + entryPtr = Tcl_CreateHashEntry (tncdata->ids, + &attrValue[start], &hnew); + attrValue[i] = save; + start = ++i; + continue; + } + i++; + } + entryPtr = Tcl_CreateHashEntry (tncdata->ids, &attrValue[start], + &hnew); + break; + + case TNC_ATTTYPE_ENTITY: + /* There is a validity constraint requesting entity attributes + values to be type Name. But if there would be an entity + declaration that doesn't fit this constraint, expat would + have already complained about the definition. So we go the + easy way and just look up the att value. If it's declared, + type must be OK, if not, it's an error anyway. */ + entryPtr = Tcl_FindHashEntry (tncdata->entityDecls, attrValue); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_ENTITY_ATTRIBUTE); + return 0; + } + entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr); + if (!entityInfo->is_notation) { + signalNotValid (userData, TNC_ERROR_ENTITY_ATTRIBUTE); + return 0; + } + break; + + case TNC_ATTTYPE_ENTITIES: + /* Normalized by exapt; for type see comment to + TNC_ATTTYPE_ENTITY */ + copy = tdomstrdup (attrValue); + start = i = 0; + while (1) { + if (copy[i] == '\0') { + entryPtr = Tcl_FindHashEntry (tncdata->entityDecls, + ©[start]); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE); + FREE (copy); + return 0; + } + entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr); + if (!entityInfo->is_notation) { + signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE); + FREE (copy); + return 0; + } + FREE (copy); + break; + } + if (copy[i] == ' ') { + copy[i] = '\0'; + entryPtr = Tcl_FindHashEntry (tncdata->entityDecls, + ©[start]); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE); + FREE (copy); + return 0; + } + entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr); + if (!entityInfo->is_notation) { + signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE); + FREE (copy); + return 0; + } + start = ++i; + continue; + } + i++; + } + break; + + case TNC_ATTTYPE_NMTOKEN: + /* We assume, that the UTF-8 representation of the value is + valid (no partial chars, minimum encoding). This makes + things a little more easy and faster. I guess (but + haven't deeply checked - QUESTION -), expat would have + already complained otherwise. */ + pc = (char*)attrValue; + clen = 0; + while (1) { + if (*pc == '\0') { + break; + } + clen = UTF8_CHAR_LEN (*pc); + CHECK_UTF_CHARLENR (clen); + if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + return 0; + } + pc += clen; + } + if (!clen) + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + break; + + case TNC_ATTTYPE_NMTOKENS: + pc = (char*)attrValue; + clen = 0; + while (1) { + if (*pc == '\0') { + break; + } + /* NMTOKENS are normalized by expat, so this should + be secure. */ + if (*pc == ' ') { + pc++; + } + clen = UTF8_CHAR_LEN (*pc); + CHECK_UTF_CHARLENR (clen); + if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + return 0; + } + pc += clen; + } + if (!clen) + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + break; + + case TNC_ATTTYPE_NOTATION: + entryPtr = Tcl_FindHashEntry (attDecl->lookupTable, attrValue); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_NOTATION_REQUIRED); + return 0; + } + break; + + case TNC_ATTTYPE_ENUMERATION: + if (!Tcl_FindHashEntry (attDecl->lookupTable, attrValue)) { + signalNotValid (userData, TNC_ERROR_ENUM_ATT_WRONG_VALUE); + return 0; + } + break; + } + + if (attDecl->isrequired) { + (*nrOfreq)++; + } + + return 1; +} + +/* + *---------------------------------------------------------------------------- + * + * TncProbeElementEnd -- + * + * This procedure checks, if the current content allows the + * the element to end here. + * + * Results: + * 1 if element end is OK, + * 0 if not. + * + * Side effects: + * Let the contentStackPtr point to the last current content + * model before the element had started. + * + *---------------------------------------------------------------------------- + */ + +static int +TncProbeElementEnd ( + domReadInfo *tncdata +) +{ + TNC_ContentStack stackelm; + unsigned int i; + int zeroMatchPossible, seqstartindex; + + stackelm = tncdata->contentStack[tncdata->contentStackPtr - 1]; + switch (stackelm.model->type) { + case XML_CTYPE_MIXED: + case XML_CTYPE_ANY: + case XML_CTYPE_EMPTY: + return 1; + case XML_CTYPE_CHOICE: + if (stackelm.alreadymatched) { + return 1; + } + + if (stackelm.model->quant == XML_CQUANT_REP || + stackelm.model->quant == XML_CQUANT_OPT) { + return 1; + } + zeroMatchPossible = 0; + for (i = 0; i < stackelm.model->numchildren; i++) { + if ((&stackelm.model->children[i])->type == XML_CTYPE_NAME) { + if ((&stackelm.model->children[i])->quant == XML_CQUANT_OPT || + (&stackelm.model->children[i])->quant == XML_CQUANT_REP) { + zeroMatchPossible = 1; + break; + } + } + else { + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *) * 2 * + tncdata->contentStackSize); + tncdata->contentStackSize *= 2; + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model + = &stackelm.model->children[i]; + tncdata->contentStack[tncdata->contentStackPtr].activeChild + = 0; + tncdata->contentStack[tncdata->contentStackPtr].deep + = stackelm.deep + 1; + tncdata->contentStack[tncdata->contentStackPtr].alreadymatched + = 0; + tncdata->contentStackPtr++; + if (TncProbeElementEnd (tncdata)) { + zeroMatchPossible = 1; + tncdata->contentStackPtr--; + break; + } + tncdata->contentStackPtr--; + } + } + if (zeroMatchPossible) { + return 1; + } else { + return 0; + } + case XML_CTYPE_SEQ: + if (!stackelm.alreadymatched) { + if (stackelm.model->quant == XML_CQUANT_REP || + stackelm.model->quant == XML_CQUANT_OPT) { + return 1; + } + } + if (!stackelm.alreadymatched) { + seqstartindex = 0; + } + else { + seqstartindex = stackelm.activeChild + 1; + } + for (i = seqstartindex; i < stackelm.model->numchildren; i++) { + if ((&stackelm.model->children[i])->type == XML_CTYPE_NAME) { + if ((&stackelm.model->children[i])->quant == XML_CQUANT_OPT || + (&stackelm.model->children[i])->quant == XML_CQUANT_REP) { + continue; + } else { + return 0; + } + } else { + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *) * 2 * + tncdata->contentStackSize); + tncdata->contentStackSize *= 2; + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model + = &stackelm.model->children[i]; + tncdata->contentStack[tncdata->contentStackPtr].activeChild + = 0; + tncdata->contentStack[tncdata->contentStackPtr].deep + = stackelm.deep + 1; + tncdata->contentStack[tncdata->contentStackPtr].alreadymatched + = 0; + tncdata->contentStackPtr++; + if (TncProbeElementEnd (tncdata)) { + tncdata->contentStackPtr--; + continue; + } + else { + tncdata->contentStackPtr--; + return 0; + } + } + } + return 1; + case XML_CTYPE_NAME: + /* NAME type dosen't occur at top level of a content model and is + handled in some "shotcut" way directly in the CHOICE and SEQ cases. + It's only here to pacify gcc -Wall. */ + fprintf (stderr, "error!!! - in TncProbeElementEnd: XML_CTYPE_NAME " + "shouldn't be reached in any case.\n"); + default: + fprintf (stderr, "error!!! - in TncProbeElementEnd: unknown content " + "type: %d\n", stackelm.model->type); + return 1; + } +} +#endif /*--------------------------------------------------------------------------- | startElement | \--------------------------------------------------------------------------*/ @@ -1163,10 +2317,149 @@ Tcl_ResetResult (info->interp); } } DispatchPCDATA (info); + +#ifndef TDOM_NO_DTD_VALIDATION + if (info->dtdvalidation) { + domReadInfo *tncdata = info; + Tcl_HashEntry *entryPtr; + Tcl_HashTable *elemAtts; + const char **atPtr; + TNC_ElemAttInfo *elemAttInfo; + TNC_Content *model; + int result, nrOfreq, acceptNoDoctype = 0; + +#ifdef TNC_DEBUG + printf ("TncElementStartCommand name: %s\n", name); +#endif + + /* If the document doesn't have a doctype declaration, but the + user have used the -useForeignDTD 1 feature, the collected + data out of the provided DTD isn't postprocessed by + TncElementStartCommand. We do this now. + NOTE: Since there wasn't a doctype declaration, there is no + information available which element is expected to be the + document element. Eventually it would be desirable, to set + this somehow. For now, this means, that every valid subtree + of the given DTD information is accepted. */ + if (!tncdata->contentStackPtr && !tncdata->elemContentsRewriten) { + TncEndDoctypeDeclHandler (userData); + acceptNoDoctype = 1; + } + + entryPtr = Tcl_FindHashEntry (tncdata->tagNames, name); + if (!entryPtr) { + signalNotValid (userData, TNC_ERROR_UNKNOWN_ELEMENT); + return; + } + model = (TNC_Content *) Tcl_GetHashValue (entryPtr); + + switch (model->type) { + case XML_CTYPE_MIXED: + case XML_CTYPE_ANY: + tncdata->skipWhiteCDATAs = 1; + tncdata->ignorePCDATA = 1; + break; + case XML_CTYPE_EMPTY: + tncdata->skipWhiteCDATAs = 0; + break; + case XML_CTYPE_CHOICE: + case XML_CTYPE_SEQ: + tncdata->skipWhiteCDATAs = 1; + tncdata->ignorePCDATA = 0; + break; + case XML_CTYPE_NAME: + break; + } + + if (tncdata->contentStackPtr) { + /* This is the normal case, within some content, + at least the root element content. */ + while (1) { + result = TncProbeElement (entryPtr, tncdata); + if (result == -1) { + if (tncdata->contentStack[tncdata->contentStackPtr - 1].deep + == 0) { + signalNotValid (userData, + TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE); + return; + } + tncdata->contentStackPtr--; + continue; + } + if (result) { + break; + } + if (!result) { + signalNotValid (userData, TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE); + return; + } + } + if (tncdata->contentStackPtr == tncdata->contentStackSize) { + tncdata->contentStackSize *= 2; + tncdata->contentStack = (TNC_ContentStack *) + Tcl_Realloc ((char *)tncdata->contentStack, + sizeof (TNC_Content *)*tncdata->contentStackSize); + } + (&tncdata->contentStack[tncdata->contentStackPtr])->model = model; + (&tncdata->contentStack[tncdata->contentStackPtr])->activeChild = 0; + (&tncdata->contentStack[tncdata->contentStackPtr])->deep = 0; + (&tncdata->contentStack[tncdata->contentStackPtr])->alreadymatched = 0; + tncdata->contentStackPtr++; + } else { + /* This is only in case of the root element */ + if (atts) { + if (!tncdata->doctypeName) { + if (!acceptNoDoctype) { + signalNotValid (userData, TNC_ERROR_NO_DOCTYPE_DECL); + return; + } + } else { + if (strcmp (tncdata->doctypeName, name) != 0) { + signalNotValid (userData, TNC_ERROR_WRONG_ROOT_ELEMENT); + return; + } + } + } + (&(tncdata->contentStack)[0])->model = model; + (&(tncdata->contentStack)[0])->activeChild = 0; + (&(tncdata->contentStack)[0])->deep = 0; + (&(tncdata->contentStack)[0])->alreadymatched = 0; + tncdata->contentStackPtr++; + } + + if (atts) { + elemAttInfo = model->attInfo; + if (!elemAttInfo) { + if (atts[0] != NULL) { + signalNotValid (userData, TNC_ERROR_NO_ATTRIBUTES); + return; + } + } else { + elemAtts = elemAttInfo->attributes; + nrOfreq = 0; + for (atPtr = atts; atPtr[0]; atPtr += 2) { + if (!TncProbeAttribute (userData, elemAtts, (char *) atPtr[0], + (char *) atPtr[1], &nrOfreq)) + return; + } + if (nrOfreq != elemAttInfo->nrOfreq) { + signalNotValid (userData, + TNC_ERROR_MISSING_REQUIRED_ATTRIBUTE); + return; + } + } + } else { + tncdata->elemAttInfo = model->attInfo; + } +#ifdef TNC_DEBUG + printf ("TncElementStartCommand end\n"); +#endif + } +#endif h = Tcl_CreateHashEntry(&HASHTAB(info->document,tdom_tagNames), name, &hnew); if (info->storeLineColumn) { node = (domNode*) domAlloc(sizeof(domNode) @@ -1442,10 +2735,80 @@ ) { domReadInfo *info = userData; DispatchPCDATA (info); + +#ifndef TDOM_NO_DTD_VALIDATION + if (info->dtdvalidation) { + domReadInfo *tncdata = info; + Tcl_HashEntry *entryPtr; + Tcl_HashSearch search; + +#ifdef TNC_DEBUG + printf ("TncElementEndCommand start\n"); + printContentStack (tncdata); +#endif + /* Calling XML_StopParser() in an element start event doesn't + * always prevent expat from calling the element end handler. + * (Yes, it's an expat thing, raised a few bugs upstream.) */ + if (tncdata->status == TCL_ERROR) return; + while (1) { + if (!TncProbeElementEnd (tncdata)) { + signalNotValid (userData, TNC_ERROR_ELEMENT_CAN_NOT_END_HERE); + return; + } + if (tncdata->contentStack[tncdata->contentStackPtr - 1].deep == 0) { + break; + } + tncdata->contentStackPtr--; + } + /* Remove the content model of the closed element from the stack */ + tncdata->contentStackPtr--; +#ifdef TNC_DEBUG + printf ("after removing ended element from the stack\n"); + printContentStack (tncdata); +#endif + if (tncdata->contentStackPtr) { + switch ((&tncdata->contentStack[tncdata->contentStackPtr - 1])->model->type) { + case XML_CTYPE_MIXED: + case XML_CTYPE_ANY: + tncdata->skipWhiteCDATAs = 1; + tncdata->ignorePCDATA = 1; + break; + case XML_CTYPE_EMPTY: + tncdata->skipWhiteCDATAs = 0; + break; + case XML_CTYPE_CHOICE: + case XML_CTYPE_SEQ: + case XML_CTYPE_NAME: + tncdata->skipWhiteCDATAs = 1; + tncdata->ignorePCDATA = 0; + break; + } + } else { + /* This means, the root element is closed, + therefor the place to check, if every IDREF points + to a ID. */ + if (tncdata->idCheck) { + for (entryPtr = Tcl_FirstHashEntry (tncdata->ids, &search); + entryPtr != NULL; + entryPtr = Tcl_NextHashEntry (&search)) { +#ifdef TNC_DEBUG + printf ("check id value %s\n", + Tcl_GetHashKey (tncdata->ids, entryPtr)); + printf ("value %p\n", Tcl_GetHashValue (entryPtr)); +#endif + if (!Tcl_GetHashValue (entryPtr)) { + signalNotValid (userData, TNC_ERROR_UNKNOWN_ID_REFERRED); + return; + } + } + } + } + } +#endif info->depth--; if (!info->ignorexmlns) { /* pop active namespaces */ while ( (info->activeNSpos >= 0) && @@ -1473,17 +2836,41 @@ | \--------------------------------------------------------------------------*/ static void characterDataHandler ( void *userData, - const char *s, + const char *data, int len ) { domReadInfo *info = userData; - Tcl_DStringAppend (info->cdata, s, len); + Tcl_DStringAppend (info->cdata, data, len); +#ifndef TDOM_NO_DTD_VALIDATION + if (info->dtdvalidation) { + domReadInfo *tncdata = info; + int i; + char *pc; + + if (!tncdata->skipWhiteCDATAs && len > 0) { + signalNotValid (userData, TNC_ERROR_EMPTY_ELEMENT); + return; + } + if (!tncdata->ignorePCDATA) { + for (i = 0, pc = (char*)data; i < len; i++, pc++) { + if ( (*pc == ' ') || + (*pc == '\n') || + (*pc == '\r') || + (*pc == '\t') ) { + continue; + } + signalNotValid (userData, TNC_ERROR_DISALLOWED_PCDATA); + return; + } + } + } +#endif return; } /*--------------------------------------------------------------------------- @@ -2120,10 +3507,583 @@ { domReadInfo *info = (domReadInfo *) userData; info->insideDTD = 0; } + +#ifndef TDOM_NO_DTD_VALIDATION +/* + *---------------------------------------------------------------------------- + * + * TncElementDeclCommand -- + * + * This procedure is called for every element declaration. + * + * Results: + * None. + * + * Side effects: + * Stores the tag name of the element in a lookup table. + * + *---------------------------------------------------------------------------- + */ + +static void +TncElementDeclCommand ( + void *userData, + const char *name, + XML_Content *model +) +{ + domReadInfo *tncdata = (domReadInfo *) userData; + Tcl_HashEntry *entryPtr; + int newPtr; + unsigned int i, j; + + entryPtr = Tcl_CreateHashEntry (tncdata->tagNames, name, &newPtr); + /* "No element type may be declared more than once." (rec. 3.2) */ + if (!newPtr) { + XML_FreeContentModel (tncdata->parser, model); + signalNotValid (userData, TNC_ERROR_DUPLICATE_ELEMENT_DECL); + return; + } + /* "The same name must not appear more than once in a + single mixed-content declaration." (rec. 3.2.2) + NOTE: OK, OK, doing it this way may not be optimal or even fast + in some cases. Please step in with a more fancy solution, if you + feel the need. */ + if (model->type == XML_CTYPE_MIXED && model->quant == XML_CQUANT_REP) { + for (i = 0; i < model->numchildren; i++) { + for (j = i + 1; j < model->numchildren; j++) { + if (strcmp ((&model->children[i])->name, + (&model->children[j])->name) == 0) { + XML_FreeContentModel (tncdata->parser, model); + signalNotValid (userData, + TNC_ERROR_DUPLICATE_MIXED_ELEMENT); + return; + } + } + } + } + Tcl_SetHashValue (entryPtr, model); + return; +} + +/* + *---------------------------------------------------------------------------- + * + * TncAttDeclCommand -- + * + * This procedure is called for *each* attribute in an XML + * ATTLIST declaration. It stores the attribute definition in + * an element specific hash table. + * + * Results: + * None. + * + * Side effects: + * Stores the tag name of the element in a lookup table. + * + *---------------------------------------------------------------------------- + */ + +static void +TncAttDeclCommand ( + void *userData, + const char *elname, + const char *attname, + const char *att_type, + const char *dflt, + int isrequired +) +{ + domReadInfo *tncdata = (domReadInfo *) userData; + Tcl_HashEntry *entryPtr, *entryPtr1; + Tcl_HashTable *elemAtts; + TNC_ElemAttInfo *elemAttInfo; + TNC_AttDecl *attDecl; + TNC_EntityInfo *entityInfo; + int newPtr, start, i, clen; + char *copy; + + entryPtr = Tcl_CreateHashEntry (tncdata->attDefsTables, elname, &newPtr); + if (newPtr) { + elemAttInfo = (TNC_ElemAttInfo *) MALLOC (sizeof (TNC_ElemAttInfo)); + elemAtts = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (elemAtts, TCL_STRING_KEYS); + elemAttInfo->attributes = elemAtts; + elemAttInfo->nrOfreq = 0; + elemAttInfo->nrOfIdAtts = 0; + Tcl_SetHashValue (entryPtr, elemAttInfo); + } else { + elemAttInfo = (TNC_ElemAttInfo *) Tcl_GetHashValue (entryPtr); + elemAtts = elemAttInfo->attributes; + } + entryPtr = Tcl_CreateHashEntry (elemAtts, attname, &newPtr); + /* Multiple Attribute declarations are allowed, but later declarations + are ignored. See rec 3.3. */ + if (newPtr) { + attDecl = (TNC_AttDecl *) MALLOC (sizeof (TNC_AttDecl)); + if (strcmp (att_type, "CDATA") == 0) { + attDecl->att_type = TNC_ATTTYPE_CDATA; + } + else if (strcmp (att_type, "ID") == 0) { + if (elemAttInfo->nrOfIdAtts) { + signalNotValid (userData, TNC_ERROR_MORE_THAN_ONE_ID_ATT); + return; + } + elemAttInfo->nrOfIdAtts++; + if (dflt != NULL) { + signalNotValid (userData, TNC_ERROR_ID_ATT_DEFAULT); + return; + } + attDecl->att_type = TNC_ATTTYPE_ID; + } + else if (strcmp (att_type, "IDREF") == 0) { + attDecl->att_type = TNC_ATTTYPE_IDREF; + } + else if (strcmp (att_type, "IDREFS") == 0) { + attDecl->att_type = TNC_ATTTYPE_IDREFS; + } + else if (strcmp (att_type, "ENTITY") == 0) { + attDecl->att_type = TNC_ATTTYPE_ENTITY; + } + else if (strcmp (att_type, "ENTITIES") == 0) { + attDecl->att_type = TNC_ATTTYPE_ENTITIES; + } + else if (strcmp (att_type, "NMTOKEN") == 0) { + attDecl->att_type = TNC_ATTTYPE_NMTOKEN; + } + else if (strcmp (att_type, "NMTOKENS") == 0) { + attDecl->att_type = TNC_ATTTYPE_NMTOKENS; + } + else if (strncmp (att_type, "NOTATION(", 9) == 0) { + /* This is a bit puzzling. expat returns something like + + + as att_type "NOTATION(gif)". */ + attDecl->att_type = TNC_ATTTYPE_NOTATION; + attDecl->lookupTable = + (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (attDecl->lookupTable, TCL_STRING_KEYS); + copy = tdomstrdup (att_type); + start = i = 9; + while (i) { + if (copy[i] == ')') { + copy[i] = '\0'; +#ifdef TNC_DEBUG + printf ("att type NOTATION: notation %s allowed\n", + ©[start]); +#endif + Tcl_CreateHashEntry (attDecl->lookupTable, + ©[start], &newPtr); + entryPtr1 = Tcl_CreateHashEntry (tncdata->notationDecls, + ©[start], &newPtr); +#ifdef TNC_DEBUG + if (newPtr) { + printf ("up to now unknown NOTATION\n"); + } else { + printf ("NOTATION already known\n"); + } +#endif + FREE (copy); + break; + } + if (copy[i] == '|') { + copy[i] = '\0'; +#ifdef TNC_DEBUG + printf ("att type NOTATION: notation %s allowed\n", + ©[start]); +#endif + Tcl_CreateHashEntry (attDecl->lookupTable, + ©[start], &newPtr); + entryPtr1 = Tcl_CreateHashEntry (tncdata->notationDecls, + ©[start], &newPtr); +#ifdef TNC_DEBUG + if (newPtr) { + printf ("up to now unknown NOTATION\n"); + } else { + printf ("NOTATION already known\n"); + } +#endif + start = ++i; + continue; + } + clen = UTF8_CHAR_LEN (copy[i]); + CHECK_UTF_CHARLEN_COPY (clen); + if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + FREE (copy); + return; + } + i += clen; + } + } + else { + /* expat returns something like + + as att_type "(numbered|bullets)", e.g. in some + "non-official" normalized way. + Makes things easier for us. */ + attDecl->att_type = TNC_ATTTYPE_ENUMERATION; + attDecl->lookupTable = + (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (attDecl->lookupTable, TCL_STRING_KEYS); + copy = tdomstrdup (att_type); + start = i = 1; + while (1) { + if (copy[i] == ')') { + copy[i] = '\0'; + Tcl_CreateHashEntry (attDecl->lookupTable, + ©[start], &newPtr); + FREE (copy); + break; + } + if (copy[i] == '|') { + copy[i] = '\0'; + Tcl_CreateHashEntry (attDecl->lookupTable, + ©[start], &newPtr); + start = ++i; + continue; + } + clen = UTF8_CHAR_LEN (copy[i]); + CHECK_UTF_CHARLEN_COPY (clen); + if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + FREE (copy); + return; + } + i += clen; + } + } + if (dflt != NULL) { + switch (attDecl->att_type) { + case TNC_ATTTYPE_ENTITY: + case TNC_ATTTYPE_IDREF: + clen = UTF8_CHAR_LEN (*dflt); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAME_START (dflt, clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return; + } + i = clen; + while (1) { + if (dflt[i] == '\0') { + break; + } + clen = UTF8_CHAR_LEN (dflt[i]); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return; + } + i += clen; + } + if (attDecl->att_type == TNC_ATTTYPE_ENTITY) { + entryPtr1 = Tcl_CreateHashEntry (tncdata->entityDecls, + dflt, &newPtr); + if (!newPtr) { + entityInfo = + (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr1); + if (!entityInfo->is_notation) { + signalNotValid (userData,TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED); + } + } + } + break; + case TNC_ATTTYPE_IDREFS: + start = i = 0; + while (1) { + if (dflt[i] == '\0') { + break; + } + if (dflt[i] == ' ') { + start = ++i; + } + if (start == i) { + clen = UTF8_CHAR_LEN (dflt[i]); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAME_START (&dflt[i], clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return; + } + i += clen; + } + else { + clen = UTF8_CHAR_LEN (dflt[i]); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + return; + } + i += clen; + } + } + break; + case TNC_ATTTYPE_ENTITIES: + copy = tdomstrdup (dflt); + start = i = 0; + while (1) { + if (copy[i] == '\0') { + FREE (copy); + break; + } + if (copy[i] == ' ') { + copy[i] = '\0'; + entryPtr1 = Tcl_CreateHashEntry (tncdata->entityDecls, + ©[start], + &newPtr); + if (!newPtr) { + entityInfo = + (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr1); + if (!entityInfo->is_notation) { + signalNotValid (userData,TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED); + } + } + start = ++i; + } + if (start == i) { + clen = UTF8_CHAR_LEN (copy[i]); + CHECK_UTF_CHARLEN_COPY (clen); + if (!UTF8_GET_NAME_START (©[i], clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + FREE (copy); + return; + } + i += clen; + } + else { + clen = UTF8_CHAR_LEN (copy[i]); + CHECK_UTF_CHARLEN_COPY (clen); + if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) { + signalNotValid (userData, TNC_ERROR_NAME_REQUIRED); + FREE (copy); + return; + } + i += clen; + } + } + break; + case TNC_ATTTYPE_NMTOKEN: + i = 0; + while (1) { + if (dflt[i] == '\0') { + break; + } + clen = UTF8_CHAR_LEN (dflt[i]); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + return; + } + i += clen; + } + if (!i) signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + break; + case TNC_ATTTYPE_NMTOKENS: + i = 0; + while (1) { + if (dflt[i] == '\0') { + break; + } + if (dflt[i] == ' ') { + i++; + } + clen = UTF8_CHAR_LEN (dflt[i]); + CHECK_UTF_CHARLEN (clen); + if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) { + signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + return; + } + i += clen; + } + if (!i) signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED); + break; + case TNC_ATTTYPE_NOTATION: + if (!Tcl_FindHashEntry (attDecl->lookupTable, dflt)) { + signalNotValid (userData, TNC_ERROR_IMPOSSIBLE_DEFAULT); + return; + } + case TNC_ATTTYPE_ENUMERATION: + if (!Tcl_FindHashEntry (attDecl->lookupTable, dflt)) { + signalNotValid (userData, TNC_ERROR_IMPOSSIBLE_DEFAULT); + return; + } + case TNC_ATTTYPE_CDATA: + case TNC_ATTTYPE_ID: + /* This both cases are only there, to pacify -Wall. + CDATA may have any allowed characters (and + everything else is detected by extpat). ID's not + allowed to have defaults (handled above). */ + ; + } + attDecl->dflt = tdomstrdup (dflt); + } + else { + attDecl->dflt = NULL; + } + if (isrequired) { + elemAttInfo->nrOfreq++; + } + attDecl->isrequired = isrequired; + Tcl_SetHashValue (entryPtr, attDecl); + } +} + +/* + *---------------------------------------------------------------------------- + * + * TncNotationDeclHandler -- + * + * This procedure is called for every notation declaration. + * + * Results: + * None. + * + * Side effects: + * Stores the notationName in the notationDecls table with value + * one. + * + *---------------------------------------------------------------------------- + */ + +static void +TncNotationDeclHandler ( + void *userData, + const char *notationName, + const char *base, + const char *systemId, + const char *publicId +) +{ + domReadInfo *tncdata = (domReadInfo *) userData; + Tcl_HashEntry *entryPtr; + int newPtr; + + entryPtr = Tcl_CreateHashEntry (tncdata->notationDecls, + notationName, + &newPtr); +#ifdef TNC_DEBUG + printf ("Notation %s declared\n", notationName); +#endif + Tcl_SetHashValue (entryPtr, (char *) 1); +} + +/* + *---------------------------------------------------------------------------- + * + * TncFreeTncModel -- + * + * This helper procedure frees recursively TNC_Contents. + * + * Results: + * None. + * + * Side effects: + * Frees memory. + * + *---------------------------------------------------------------------------- + */ + +static void +TncFreeTncModel ( + TNC_Content *tmodel +) +{ + unsigned int i; + + if (tmodel->children) { + for (i = 0; i < tmodel->numchildren; i++) { + TncFreeTncModel (&tmodel->children[i]); + } + FREE ((char *) tmodel->children); + } +} + +static void +TncFreeValidationData ( + domReadInfo *info + ) +{ + Tcl_HashEntry *entryPtr, *attentryPtr; + Tcl_HashSearch search, attsearch; + TNC_Content *model; + TNC_ElemAttInfo *elemAttInfo; + TNC_EntityInfo *entityInfo; + TNC_AttDecl *attDecl; + + if (!info->dtdvalidation) return; + + entryPtr = Tcl_FirstHashEntry (info->tagNames, &search); + while (entryPtr) { + model = Tcl_GetHashValue (entryPtr); + if (info->elemContentsRewriten) { + if (model) { + TncFreeTncModel (model); + FREE ((char *) model); + } + } else { + XML_FreeContentModel(info->parser, (XML_Content *)model); + } + entryPtr = Tcl_NextHashEntry (&search); + } + Tcl_DeleteHashTable (info->tagNames); + entryPtr = Tcl_FirstHashEntry (info->attDefsTables, &search); + while (entryPtr) { + elemAttInfo = Tcl_GetHashValue (entryPtr); + if (!elemAttInfo) { + entryPtr = Tcl_NextHashEntry (&search); + continue; + } + attentryPtr = Tcl_FirstHashEntry (elemAttInfo->attributes, &attsearch); + while (attentryPtr) { + attDecl = Tcl_GetHashValue (attentryPtr); + if (attDecl) { + if (attDecl->att_type == TNC_ATTTYPE_NOTATION || + attDecl->att_type == TNC_ATTTYPE_ENUMERATION) { + Tcl_DeleteHashTable (attDecl->lookupTable); + FREE ((char *) attDecl->lookupTable); + } + if (attDecl->dflt) { + FREE (attDecl->dflt); + } + FREE ((char *) attDecl); + } + attentryPtr = Tcl_NextHashEntry (&attsearch); + } + Tcl_DeleteHashTable (elemAttInfo->attributes); + FREE ((char *) elemAttInfo->attributes); + FREE ((char *) elemAttInfo); + entryPtr = Tcl_NextHashEntry (&search); + } + Tcl_DeleteHashTable (info->attDefsTables); + entryPtr = Tcl_FirstHashEntry (info->entityDecls, &search); + while (entryPtr) { + entityInfo = Tcl_GetHashValue (entryPtr); + if (entityInfo) { + if (entityInfo->is_notation) { + FREE (entityInfo->notationName); + } + FREE ((char *) entityInfo); + } + entryPtr = Tcl_NextHashEntry (&search); + } + Tcl_DeleteHashTable (info->entityDecls); + Tcl_DeleteHashTable (info->notationDecls); + Tcl_DeleteHashTable (info->ids); + if (info->doctypeName) { + FREE (info->doctypeName); + } + FREE ((char *) info->tagNames); + FREE ((char *) info->attDefsTables); + FREE ((char *) info->entityDecls); + FREE ((char *) info->notationDecls); + FREE ((char *) info->ids); + FREE ((char *) info->contentStack); +} +#endif /*--------------------------------------------------------------------------- | domReadDocument | \--------------------------------------------------------------------------*/ @@ -2141,10 +4101,13 @@ Tcl_Channel channel, const char *baseurl, Tcl_Obj *extResolver, int useForeignDTD, int paramEntityParsing, +#ifndef TDOM_NO_DTD_VALIDATION + int dtdvalidation, +#endif Tcl_Interp *interp, int *resultcode ) { int done, tclLen; @@ -2187,11 +4150,39 @@ info.baseURIstackSize = INITIAL_BASEURISTACK_SIZE; info.baseURIstack = (domActiveBaseURI*) MALLOC (sizeof(domActiveBaseURI) * info.baseURIstackSize); info.insideDTD = 0; info.status = 0; - +#ifndef TDOM_NO_DTD_VALIDATION + info.dtdvalidation = dtdvalidation; + + if (dtdvalidation) { + info.tagNames = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (info.tagNames, TCL_STRING_KEYS); + info.elemContentsRewriten = 0; + info.dtdstatus = 0; + info.idCheck = 1; + info.attDefsTables = + (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (info.attDefsTables, TCL_STRING_KEYS); + info.entityDecls = + (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (info.entityDecls, TCL_STRING_KEYS); + info.notationDecls = + (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (info.notationDecls, TCL_STRING_KEYS); + info.ids = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable)); + Tcl_InitHashTable (info.ids, TCL_STRING_KEYS); + info.doctypeName = NULL; + info.skipWhiteCDATAs = 1; + info.ignorePCDATA = 0; + info.contentStack = (TNC_ContentStack *) + MALLOC (sizeof (TNC_ContentStack) * TNC_INITCONTENTSTACKSIZE); + info.contentStackSize = TNC_INITCONTENTSTACKSIZE; + info.contentStackPtr = 0; + } +#endif XML_SetUserData(parser, &info); XML_SetBase (parser, baseurl); /* We must use XML_GetBase(), because XML_SetBase copies the baseURI, and we want to compare the pointers */ info.baseURIstack[0].baseURI = XML_GetBase (parser); @@ -2210,17 +4201,22 @@ XML_SetDoctypeDeclHandler (parser, startDoctypeDeclHandler, endDoctypeDeclHandler); if (keepCDATA) { XML_SetCdataSectionHandler(parser, startCDATA, endCDATA); } - - +#ifndef TDOM_NO_DTD_VALIDATION + if (dtdvalidation) { + XML_SetElementDeclHandler (parser, TncElementDeclCommand); + XML_SetAttlistDeclHandler (parser, TncAttDeclCommand); + XML_SetNotationDeclHandler (parser, TncNotationDeclHandler); + } +#endif if (channel == NULL) { status = XML_Parse(parser, xml, length, 1); switch (status) { case XML_STATUS_SUSPENDED: - DBG(fprintf(stderr, "XML_STATUS_SUSPENDED\n");) + DBG(fprintf(stderr, "XML_STATUS_SUSPENDED\n")); if (info.status == TCL_BREAK) { Tcl_ResetResult(interp); } /* fall throu */ case XML_STATUS_ERROR: @@ -2227,10 +4223,13 @@ DBG(fprintf(stderr, "XML_STATUS_ERROR\n");) FREE ( info.activeNS ); FREE ( info.baseURIstack ); Tcl_DStringFree (info.cdata); FREE ( info.cdata); +#ifndef TDOM_NO_DTD_VALIDATION + if (info.dtdvalidation) TncFreeValidationData (&info); +#endif domFreeDocument (doc, NULL, NULL); *resultcode = info.status; return NULL; case XML_STATUS_OK: break; @@ -2240,10 +4239,13 @@ if (Tcl_GetChannelOption (interp, channel, "-encoding", &dStr) != TCL_OK) { FREE ( (char*) info.activeNS ); FREE ( info.baseURIstack ); Tcl_DStringFree (info.cdata); FREE ( info.cdata); +#ifndef TDOM_NO_DTD_VALIDATION + if (info.dtdvalidation) TncFreeValidationData (&info); +#endif domFreeDocument (doc, NULL, NULL); *resultcode = info.status; return NULL; } if (strcmp (Tcl_DStringValue (&dStr), "utf-8")==0 ) useBinary = 1; @@ -2265,10 +4267,13 @@ DBG(fprintf(stderr, "XML_STATUS_ERROR\n");) FREE ( info.activeNS ); FREE ( info.baseURIstack ); Tcl_DStringFree (info.cdata); FREE ( info.cdata); +#ifndef TDOM_NO_DTD_VALIDATION + if (info.dtdvalidation) TncFreeValidationData (&info); +#endif domFreeDocument (doc, NULL, NULL); *resultcode = info.status; return NULL; case XML_STATUS_OK: break; @@ -2293,10 +4298,13 @@ DBG(fprintf(stderr, "XML_STATUS_ERROR\n");) FREE ( info.activeNS ); FREE ( info.baseURIstack ); Tcl_DStringFree (info.cdata); FREE ( info.cdata); +#ifndef TDOM_NO_DTD_VALIDATION + if (info.dtdvalidation) TncFreeValidationData (&info); +#endif domFreeDocument (doc, NULL, NULL); Tcl_DecrRefCount (bufObj); *resultcode = info.status; return NULL; case XML_STATUS_OK: @@ -2308,11 +4316,14 @@ } FREE ( info.activeNS ); FREE ( info.baseURIstack ); Tcl_DStringFree (info.cdata); FREE ( info.cdata); - +#ifndef TDOM_NO_DTD_VALIDATION + if (info.dtdvalidation) TncFreeValidationData (&info); +#endif + domSetDocumentElement (doc); return doc; } @@ -5169,10 +7180,13 @@ domActiveNS *activeNS; int baseURIstackSize; int baseURIstackPos; domActiveBaseURI *baseURIstack; int insideDTD; +#ifndef TDOM_NO_DTD_VALIDATION + int dtdvalidation; +#endif /* Now the tdom cmd specific elements */ int tdomStatus; Tcl_Obj *extResolver; } tdomCmdReadInfo; @@ -5241,10 +7255,13 @@ Tcl_DStringSetLength (info->cdata, 0); info->nextFeedbackPosition = info->feedbackAfter; info->interp = interp; info->activeNSpos = -1; info->insideDTD = 0; +#ifndef TDOM_NO_DTD_VALIDATION + info->dtdvalidation = 0; +#endif info->baseURIstackPos = 0; info->tdomStatus = 0; } @@ -5378,10 +7395,13 @@ info->baseURIstackPos = 0; info->baseURIstackSize = INITIAL_BASEURISTACK_SIZE; info->baseURIstack = (domActiveBaseURI*) MALLOC (sizeof(domActiveBaseURI) * info->baseURIstackSize); info->insideDTD = 0; +#ifndef TDOM_NO_DTD_VALIDATION + info->dtdvalidation = 0; +#endif info->tdomStatus = 0; info->extResolver = NULL; handlerSet->userData = info; Index: generic/dom.h ================================================================== --- generic/dom.h +++ generic/dom.h @@ -725,10 +725,13 @@ Tcl_Channel channel, const char *baseurl, Tcl_Obj *extResolver, int useForeignDTD, int paramEntityParsing, +#ifndef TDOM_NO_DTD_VALIDATION + int dtdvalidation, +#endif Tcl_Interp *interp, int *status); void domFreeDocument (domDocument *doc, domFreeCallback freeCB, Index: generic/domxslt.c ================================================================== --- generic/domxslt.c +++ generic/domxslt.c @@ -5790,11 +5790,15 @@ } /* keep white space, no fiddling with the encoding (is this a good idea?) */ doc = domReadDocument (parser, xmlstring, len, 0, 0, storeLineColumn, 0, 0, NULL, chan, extbase, extResolver, 0, - (int) XML_PARAM_ENTITY_PARSING_ALWAYS, interp, + (int) XML_PARAM_ENTITY_PARSING_ALWAYS, +#ifndef TDOM_NO_DTD_VALIDATION + 0, +#endif + interp, &resultcode); if (xsltDoc->extResolver) { Tcl_DecrRefCount (extResolver); } if (doc == NULL) { Index: generic/tcldom.c ================================================================== --- generic/tcldom.c +++ generic/tcldom.c @@ -1223,10 +1223,13 @@ NULL, NULL, extResolver, 0, (int) XML_PARAM_ENTITY_PARSING_ALWAYS, +#ifndef TDOM_NO_DTD_VALIDATION + 0, +#endif interp, &resultcode); if (extResolver) { Tcl_DecrRefCount(extResolver); } @@ -6157,10 +6160,13 @@ int ignorexmlns = 0; int feedbackAfter = 0; int useForeignDTD = 0; int paramEntityParsing = (int)XML_PARAM_ENTITY_PARSING_ALWAYS; int keepCDATA = 0; +#ifndef TDOM_NO_DTD_VALIDATION + int dtdvalidation = 0; +#endif int status = 0; domDocument *doc; Tcl_Obj *newObjName = NULL; XML_Parser parser; Tcl_Channel chan = (Tcl_Channel) NULL; @@ -6173,11 +6179,15 @@ "-feedbackcmd", "-json", "-jsonroot", #ifdef TDOM_HAVE_GUMBO "-html5", #endif "-jsonmaxnesting", "-ignorexmlns", "--", - "-keepCDATA", NULL + "-keepCDATA", +#ifndef TDOM_NO_DTD_VALIDATION + "-dtdvalidation", +#endif + NULL }; enum parseOption { o_keepEmpties, o_simple, o_html, o_feedbackAfter, o_channel, o_baseurl, o_externalentitycommand, o_useForeignDTD, o_paramentityparsing, @@ -6185,10 +6195,13 @@ #ifdef TDOM_HAVE_GUMBO o_htmlfive, #endif o_jsonmaxnesting, o_ignorexmlns, o_LAST, o_keepCDATA +#ifndef TDOM_NO_DTD_VALIDATION + ,o_dtdvalidation +#endif }; static const char *paramEntityParsingValues[] = { "always", "never", @@ -6410,10 +6423,16 @@ objv++; objc--; break; case o_keepCDATA: keepCDATA = 1; objv++; objc--; break; + +#ifndef TDOM_NO_DTD_VALIDATION + case o_dtdvalidation: + dtdvalidation = 1; + objv++; objc--; break; +#endif } if ((enum parseOption) optionIndex == o_LAST) break; } @@ -6569,10 +6588,13 @@ chan, baseURI, extResolver, useForeignDTD, paramEntityParsing, +#ifndef TDOM_NO_DTD_VALIDATION + dtdvalidation, +#endif interp, &status); if (doc == NULL) { char s[50]; long byteIndex, i; @@ -6660,18 +6682,19 @@ static const char *features[] = { "expatversion", "expatmajorversion", "expatminorversion", "expatmicroversion", "dtd", "ns", "unknown", "tdomalloc", "lessns", "html5", "jsonmaxnesting", "versionhash", - "pullparser", "TCL_UTF_MAX", NULL + "pullparser", "TCL_UTF_MAX", "dtdvalidation", + NULL }; enum feature { o_expatversion, o_expatmajorversion, o_expatminorversion, o_expatmicroversion, o_dtd, o_ns, o_unknown, o_tdomalloc, o_lessns, o_html5, o_jsonmaxnesting, o_versionhash, - o_pullparser, o_TCL_UTF_MAX, + o_pullparser, o_TCL_UTF_MAX, o_dtdvalidation }; if (Tcl_GetIndexFromObj(interp, objv[1], features, "feature", 0, &featureIndex) != TCL_OK) { return TCL_ERROR; @@ -6754,10 +6777,18 @@ SetBooleanResult(result); break; case o_TCL_UTF_MAX: SetIntResult(TCL_UTF_MAX); break; + case o_dtdvalidation: +#ifndef TDOM_NO_DTD_VALIDATION + result = 1; +#else + result = 0; +#endif + SetBooleanResult(result); + break; } return TCL_OK; } /*---------------------------------------------------------------------------- Index: tdom.m4 ================================================================== --- tdom.m4 +++ tdom.m4 @@ -263,10 +263,51 @@ fi else AC_MSG_RESULT([no]) fi ]) + +#------------------------------------------------------------------------ +# TDOM_ENABLE_DTD_VALIDATION -- +# +# Building with validation features. +# +# Arguments: +# None +# +# Results: +# +# Adds the following arguments to configure: +# --enable-dtdvalidation=yes|no +# +# Defines the following vars: +# +# Sets the following vars: +# +#------------------------------------------------------------------------ + +AC_DEFUN(TDOM_ENABLE_DTD_VALIDATION, [ + AC_MSG_CHECKING([whether to enable dtd valiation]) + AC_ARG_ENABLE(dtdvalidation, + AC_HELP_STRING([--enable-dtdvalidation], + [build with valiation features (default: on)]), + [tcl_ok=$enableval], [tcl_ok=yes]) + + if test "${enable_dtdvalidation+set}" = set; then + enableval="$enable_dtdvalidation" + tcl_ok=$enableval + else + tcl_ok=yes + fi + + if test "$tcl_ok" = "no" ; then + AC_MSG_RESULT([no]) + AC_DEFINE(TDOM_NO_DTD_VALIDATION) + else + AC_MSG_RESULT([yes]) + fi +]) #------------------------------------------------------------------------ # TDOM_PATH_AOLSERVER # # Allows the building with support for AOLserver Index: tests/data/REC-xslt-19991116.xml ================================================================== --- tests/data/REC-xslt-19991116.xml +++ tests/data/REC-xslt-19991116.xml @@ -6352,15 +6352,11 @@

a way to group together consecutive nodes having duplicate subelements or attributes;

features to make handling of the HTML style attribute more convenient.

- -

features to make handling of the HTML style -attribute more convenient.

- Index: tests/dom.test ================================================================== --- tests/dom.test +++ tests/dom.test @@ -17,10 +17,12 @@ # dom-12.*: -feedbackAfter # # Copyright (c) 2002, 2003, 2004 Rolf Ade. source [file join [file dir [info script]] loadtdom.tcl] + +testConstraint dtdvalidation [dom featureinfo dtdvalidation] test dom-1.1 {createDocument with root node name not a XML Name} { list [catch {dom createDocument "root node"} msg] $msg } "1 {Invalid root element name 'root node'}" @@ -670,16 +672,66 @@ set result [$doc selectNodes count(doc/e/node())] $doc delete set result } 1 -test dom-2.42 {namespaces} { +proc dom-2.42-resolver {base systemId publicId} { + switch $publicId { + "-//W3C//DTD Specification V2.0//EN" { + set fd [open [file join [file dir [info script]] \ + data/xmlspec-v20.dtd]] + set xmlspec [read $fd] + close $fd + return [list "string" "" $xmlspec] + } + default { + puts stderr "Unexpected systemId '$systemId'" + return "" + } + } +} + +test dom-2.42 {-dtdvalidation} {dtdvalidation} { + set file [file join [file dir [info script]] data/REC-xslt-19991116.xml] + set fd [open $file] + set doc [dom parse -dtdvalidation \ + -channel $fd \ + -baseurl file://$file \ + -externalentitycommand dom-2.42-resolver] + $doc delete +} {} + +test dom-2.43 {-dtdvalidation / invalid dtd} {dtdvalidation} { + set result [catch { + dom parse -dtdvalidation {]> + }} errMsg] + set result +} 1 + +test dom-2.44 {namespaces} { set doc [dom parse {
notes
}] $doc delete } {} + +test dom-2.45 {-dtdvalidation / no dtd} {dtdvalidation} { + set result [catch { + #dom parse -dtdvalidation + dom parse -dtdvalidation + } errMsg] + set result +} 1 + +test dom-2.46 {Unknown empty element} {dtdvalidation} { + set result [catch { + dom parse -dtdvalidation {]> + }} errMsg] + set result +} 1 test dom-3.1 {isName} { dom isName ":foo" } {1}