Index: configure
==================================================================
--- configure
+++ configure
@@ -759,10 +759,11 @@
enable_dtd
enable_ns
enable_ucmd
enable_tdomalloc
enable_lessns
+enable_dtdvalidation
'
ac_precious_vars='build_alias
host_alias
target_alias
CC
@@ -1405,10 +1406,11 @@
--enable-ns build with XML namespace support (default: on)
--enable-unknown enable built-in unknown command (default: off)
--enable-tdomalloc build with the tDOM allocator (default: off)
--enable-lessns build with lower limit for XML ns declarations
(default: off)
+ --enable-dtdvalidation build with valiation features (default: on)
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-tcl directory containing tcl configuration
@@ -8891,10 +8893,38 @@
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable dtd valiation" >&5
+$as_echo_n "checking whether to enable dtd valiation... " >&6; }
+ # Check whether --enable-dtdvalidation was given.
+if test "${enable_dtdvalidation+set}" = set; then :
+ enableval=$enable_dtdvalidation; tcl_ok=$enableval
+else
+ tcl_ok=yes
+fi
+
+
+ if test "${enable_dtdvalidation+set}" = set; then
+ enableval="$enable_dtdvalidation"
+ tcl_ok=$enableval
+ else
+ tcl_ok=yes
+ fi
+
+ if test "$tcl_ok" = "no" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ $as_echo "#define TDOM_NO_DTD_VALIDATION 1" >>confdefs.h
+
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ fi
+
TDOMSHELL=tcldomsh
Index: configure.ac
==================================================================
--- configure.ac
+++ configure.ac
@@ -217,10 +217,11 @@
TDOM_ENABLE_DTD
TDOM_ENABLE_NS
TDOM_ENABLE_UNKNOWN
TDOM_ENABLE_TDOMALLOC
TDOM_ENABLE_LESS_NS
+TDOM_ENABLE_DTD_VALIDATION
TDOMSHELL=tcldomsh
AC_SUBST(TDOMSHELL)
TDOM_EXPORT_CONFIG
Index: doc/domNode.xml
==================================================================
--- doc/domNode.xml
+++ doc/domNode.xml
@@ -378,13 +378,14 @@
Similar to the way described above to inject literals in a secure
way into the XPath expression using tcl variable references there is a
syntax to inject element names from tcl variables. At every place
where the XPath syntax allows a node test there could be a tcl
variable reference (in any form), just the leading $ replaced with %.
-This allows one to select nodes with 'strange' (invalid, according to the
-appropriate XML production rule) node names which may be needed in
-case of working with JSON data.
+This allows one to select nodes with 'strange' (invalid, according to
+the appropriate XML production rule) node names which may be needed in
+case of working with JSON data or if the DOM tree was build with
+-ignorexmlns.
The option -namespaces expects a tcl list with prefix /
namespace pairs as argument. If this option is not given, then any
namespace prefix within the xpath expression will be first resolved
against the list of prefix / namespace pairs set with the
Index: extensions/tnc/tnc.c
==================================================================
--- extensions/tnc/tnc.c
+++ extensions/tnc/tnc.c
@@ -2061,11 +2061,10 @@
fprintf (stderr, "error!!! - in TncProbeElementEnd: unknown content "
"type: %d\n", stackelm.model->type);
return 1;
}
}
-
/*
*----------------------------------------------------------------------------
*
* TncElementEndCommand --
Index: generic/dom.c
==================================================================
--- generic/dom.c
+++ generic/dom.c
@@ -120,10 +120,151 @@
int depth;
const char *baseURI;
} domActiveBaseURI;
+
+#ifndef TDOM_NO_DTD_VALIDATION
+/* The elements of TNC_Content carry exactly the same information
+ as expats XML_Content. But the element is identified by his
+ Tcl_HashEntry entry within the "tagNames" Hashtable (see TNC_Data)
+ and not the element name. This should be much more efficient. */
+typedef struct TNC_cp TNC_Content;
+typedef struct TNC_elemAttInfo TNC_ElemAttInfo;
+
+struct TNC_cp
+{
+ enum XML_Content_Type type;
+ enum XML_Content_Quant quant;
+ Tcl_HashEntry *nameId;
+ unsigned int numchildren;
+ TNC_Content *children;
+ TNC_ElemAttInfo *attInfo;
+};
+
+typedef struct TNC_contentStack
+{
+ TNC_Content *model;
+ int activeChild;
+ int deep;
+ int alreadymatched;
+} TNC_ContentStack;
+
+typedef enum TNC_attType {
+ TNC_ATTTYPE_CDATA,
+ TNC_ATTTYPE_ID,
+ TNC_ATTTYPE_IDREF,
+ TNC_ATTTYPE_IDREFS,
+ TNC_ATTTYPE_ENTITY,
+ TNC_ATTTYPE_ENTITIES,
+ TNC_ATTTYPE_NMTOKEN,
+ TNC_ATTTYPE_NMTOKENS,
+ TNC_ATTTYPE_NOTATION,
+ TNC_ATTTYPE_ENUMERATION,
+} TNC_AttType;
+
+struct TNC_elemAttInfo
+{
+ Tcl_HashTable *attributes;
+ int nrOfreq;
+ int nrOfIdAtts;
+};
+
+typedef struct TNC_attDecl
+{
+ TNC_AttType att_type;
+ char *dflt;
+ int isrequired;
+ Tcl_HashTable *lookupTable; /* either NotationTypes or enum values */
+} TNC_AttDecl;
+
+typedef struct TNC_entityInfo
+{
+ int is_notation;
+ char *notationName;
+} TNC_EntityInfo;
+
+typedef Tcl_HashEntry TNC_NameId;
+
+#define TNC_INITCONTENTSTACKSIZE 512
+
+enum TNC_Error {
+ TNC_ERROR_NONE,
+ TNC_ERROR_DUPLICATE_ELEMENT_DECL,
+ TNC_ERROR_DUPLICATE_MIXED_ELEMENT,
+ TNC_ERROR_UNKNOWN_ELEMENT,
+ TNC_ERROR_EMPTY_ELEMENT,
+ TNC_ERROR_DISALLOWED_PCDATA,
+ TNC_ERROR_DISALLOWED_CDATA,
+ TNC_ERROR_NO_DOCTYPE_DECL,
+ TNC_ERROR_WRONG_ROOT_ELEMENT,
+ TNC_ERROR_NO_ATTRIBUTES,
+ TNC_ERROR_UNKNOWN_ATTRIBUTE,
+ TNC_ERROR_WRONG_FIXED_ATTVALUE,
+ TNC_ERROR_MISSING_REQUIRED_ATTRIBUTE,
+ TNC_ERROR_MORE_THAN_ONE_ID_ATT,
+ TNC_ERROR_ID_ATT_DEFAULT,
+ TNC_ERROR_DUPLICATE_ID_VALUE,
+ TNC_ERROR_UNKNOWN_ID_REFERRED,
+ TNC_ERROR_ENTITY_ATTRIBUTE,
+ TNC_ERROR_ENTITIES_ATTRIBUTE,
+ TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED,
+ TNC_ERROR_NOTATION_REQUIRED,
+ TNC_ERROR_NOTATION_MUST_BE_DECLARED,
+ TNC_ERROR_IMPOSSIBLE_DEFAULT,
+ TNC_ERROR_ENUM_ATT_WRONG_VALUE,
+ TNC_ERROR_NMTOKEN_REQUIRED,
+ TNC_ERROR_NAME_REQUIRED,
+ TNC_ERROR_NAMES_REQUIRED,
+ TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE,
+ TNC_ERROR_ELEMENT_CAN_NOT_END_HERE,
+ TNC_ERROR_ONLY_THREE_BYTE_UTF8,
+ TNC_ERROR_UNKNOWN_NODE_TYPE
+};
+
+const char *
+TNC_ErrorString (int code)
+{
+ static const char *message[] = {
+ "No error.",
+ "Element declared more than once.",
+ "The same name must not appear more than once in \n\tone mixed-content declaration.",
+ "No declaration for this element.",
+ "Element is declared to be empty, but isn't.",
+ "PCDATA not allowed here.",
+ "CDATA section not allowed here.",
+ "No DOCTYPE declaration.",
+ "Root element doesn't match DOCTYPE name.",
+ "No attributes defined for this element.",
+ "Unknown attribute for this element.",
+ "Attribute value must match the FIXED default.",
+ "Required attribute missing.",
+ "Only one attribute with type ID allowed.",
+ "No default value allowed for attribute type ID.",
+ "ID attribute values must be unique within the document.",
+ "Unknown ID referred.",
+ "Attribute value has to be a unparsed entity.",
+ "Attribute value has to be a sequence of unparsed entities.",
+ "The defaults of attributes with type ENTITY or ENTITIES\nhas to be unparsed entities.",
+ "Attribute value has to be one of the allowed notations.",
+ "Every used NOTATION must be declared.",
+ "Attribute default is not one of the allowed values",
+ "Attribute hasn't one of the allowed values.",
+ "Attribute value has to be a NMTOKEN.",
+ "Attribute value has to be a Name.",
+ "Attribute value has to match production Names.",
+ "Element is not allowed here.",
+ "Element can not end here (required element(s) missing).",
+ "Can only handle UTF8 chars up to 3 bytes length."
+ "Unknown or unexpected dom node type."
+ };
+/* if (code > 0 && code < sizeof(message)/sizeof(message[0])) */
+ return message[code];
+ return 0;
+}
+#endif
+
/*---------------------------------------------------------------------------
| type domReadInfo
|
\--------------------------------------------------------------------------*/
typedef struct _domReadInfo {
@@ -146,20 +287,86 @@
domActiveNS *activeNS;
int baseURIstackSize;
int baseURIstackPos;
domActiveBaseURI *baseURIstack;
int insideDTD;
+#ifndef TDOM_NO_DTD_VALIDATION
+ int dtdvalidation;
+#endif
int status;
-
+#ifndef TDOM_NO_DTD_VALIDATION
+ /* DTD validation releated struct members follow */
+ char *doctypeName; /* From DOCTYPE declaration */
+ int skipWhiteCDATAs; /* Flag: white space allowed in
+ current content model? */
+ int ignorePCDATA; /* Flag: currently mixed content
+ model? */
+ Tcl_HashTable *tagNames; /* Hash table of all ELEMENT
+ declarations of the DTD.
+ Element name is the key.
+ While parsing, entry points
+ to the XML_Content of that
+ Element, after finishing of
+ DTD parsing, entry holds a
+ pointer to the TNC_Content
+ of that element. */
+ TNC_ElemAttInfo *elemAttInfo; /* TncElementStartCommand stores
+ the elemAttInfo pointer of
+ the current element here for
+ DOM validation, to avoid two
+ element name lookups. */
+ int elemContentsRewriten; /* Signals, if the tagNames
+ entries point to
+ TNC_Contents */
+ int dtdstatus; /* While used with expat obj:
+ 1 after successful parsed
+ DTD, 0 otherwise.
+ For validateCmd used for
+ error report during
+ validation: 0 OK, 1 validation
+ error. */
+ int idCheck; /* Flag: check IDREF resolution*/
+ Tcl_HashTable *attDefsTables; /* Used to store ATTLIST
+ declarations while parsing.
+ Keys are the element names. */
+ Tcl_HashTable *entityDecls; /* Used to store ENTITY
+ declarations. */
+ Tcl_HashTable *notationDecls; /* Used to store NOTATION
+ declarations. */
+ Tcl_HashTable *ids; /* Used to track IDs */
+ int contentStackSize; /* Current size of the content
+ stack */
+ int contentStackPtr; /* Points to the currently active
+ content model on the stack */
+ TNC_ContentStack *contentStack; /* Stack for the currently
+ nested open content models. */
+#endif
} domReadInfo;
/*----------------------------------------------------------------------------
| Prototypes
|
\---------------------------------------------------------------------------*/
static void DispatchPCDATA (domReadInfo *info);
+#ifndef TDOM_NO_DTD_VALIDATION
+#define CHECK_UTF_CHARLEN(d) if (!(d)) { \
+ signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\
+ return;\
+ }
+
+#define CHECK_UTF_CHARLENR(d) if (!(d)) { \
+ signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\
+ return 0;\
+ }
+
+#define CHECK_UTF_CHARLEN_COPY(d) if (!(d)) { \
+ signalNotValid (userData, TNC_ERROR_ONLY_THREE_BYTE_UTF8);\
+ FREE (copy);\
+ return;\
+ }
+#endif
#ifndef TCL_THREADS
/*---------------------------------------------------------------------------
| domModuleFinalize
@@ -229,10 +436,32 @@
domProcessingInstructionNode * coerceToProcessingInstructionNode( domNode *n ) {
return (domProcessingInstructionNode *)n;
}
+
+#ifndef TDOM_NO_DTD_VALIDATION
+static void
+signalNotValid (userData, code)
+ void *userData;
+ int code;
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ char linenr[50], colnr[50];
+
+
+ tncdata->status = TCL_ERROR;
+ sprintf(linenr, "%ld", XML_GetCurrentLineNumber(tncdata->parser));
+ sprintf(colnr, "%ld", XML_GetCurrentColumnNumber(tncdata->parser));
+ Tcl_ResetResult (tncdata->interp);
+ Tcl_AppendResult (tncdata->interp, "At line ", linenr, ", column ",
+ colnr, ": ", (char *)TNC_ErrorString (code),
+ NULL);
+ XML_StopParser(tncdata->parser, 1);
+}
+#endif
+
/*---------------------------------------------------------------------------
| domIsNAME
|
\--------------------------------------------------------------------------*/
int
@@ -1113,10 +1342,935 @@
return NULL;
}
#ifndef TDOM_NO_EXPAT
+#ifndef TDOM_NO_DTD_VALIDATION
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncRewriteModel --
+ *
+ * This helper procedure creates recursively a TNC_Content from
+ * a XML_Content.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Allocates memory for the TNC_Content models.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncRewriteModel (
+ XML_Content *emodel,
+ TNC_Content *tmodel,
+ Tcl_HashTable *tagNames
+)
+{
+ Tcl_HashEntry *entryPtr;
+ unsigned int i;
+
+ tmodel->type = emodel->type;
+ tmodel->quant = emodel->quant;
+ tmodel->numchildren = emodel->numchildren;
+ tmodel->children = NULL;
+ tmodel->nameId = NULL;
+ switch (emodel->type) {
+ case XML_CTYPE_MIXED:
+ if (emodel->quant == XML_CQUANT_REP) {
+ tmodel->children = (TNC_Content *)
+ MALLOC (sizeof (TNC_Content) * emodel->numchildren);
+ for (i = 0; i < emodel->numchildren; i++) {
+ TncRewriteModel (&emodel->children[i], &tmodel->children[i],
+ tagNames);
+ }
+ }
+ break;
+ case XML_CTYPE_ANY:
+ case XML_CTYPE_EMPTY:
+ /* do nothing */
+ break;
+ case XML_CTYPE_SEQ:
+ case XML_CTYPE_CHOICE:
+ tmodel->children = (TNC_Content *)
+ MALLOC (sizeof (TNC_Content) * emodel->numchildren);
+ for (i = 0; i < emodel->numchildren; i++) {
+ TncRewriteModel (&emodel->children[i], &tmodel->children[i],
+ tagNames);
+ }
+ break;
+ case XML_CTYPE_NAME:
+ entryPtr = Tcl_FindHashEntry (tagNames, emodel->name);
+ /* Notice, that it is possible for entryPtr to be NULL.
+ This means, a content model uses a not declared element.
+ This is legal even in valid documents. (Of course, if the
+ undeclared element actually shows up in the document
+ that would make the document invalid.) See rec 3.2
+
+ QUESTION: Should there be a flag to enable a warning,
+ when a declaration contains an element type for which
+ no declaration is provided, as rec 3.2 metioned?
+ This would be the appropriated place to omit the
+ warning. */
+ tmodel->nameId = entryPtr;
+ }
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncEndDoctypeDeclHandler --
+ *
+ * This procedure is called at the end of the DOCTYPE
+ * declaration, after processing any external subset.
+ * It rewrites the XML_Content models to TNC_Content
+ * models and frees the XML_Content models.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Rewrites the XML_Content models to TNC_Content
+ * models.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncEndDoctypeDeclHandler (
+ void *userData
+)
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ Tcl_HashEntry *entryPtr, *ePtr1;
+ Tcl_HashSearch search;
+ XML_Content *emodel;
+ TNC_Content *tmodel = NULL;
+ char *elementName;
+
+ entryPtr = Tcl_FirstHashEntry (tncdata->tagNames, &search);
+ while (entryPtr != NULL) {
+#ifdef TNC_DEBUG
+ printf ("name: %-20s nameId: %p\n",
+ Tcl_GetHashKey (tncdata->tagNames, entryPtr),
+ entryPtr);
+#endif
+ emodel = (XML_Content*) Tcl_GetHashValue (entryPtr);
+ tmodel = (TNC_Content*) MALLOC (sizeof (TNC_Content));
+ TncRewriteModel (emodel, tmodel, tncdata->tagNames);
+ elementName = Tcl_GetHashKey (tncdata->tagNames, entryPtr);
+ ePtr1 = Tcl_FindHashEntry (tncdata->attDefsTables, elementName);
+ if (ePtr1) {
+ tmodel->attInfo = (TNC_ElemAttInfo *) Tcl_GetHashValue (ePtr1);
+ } else {
+ tmodel->attInfo = NULL;
+ }
+ Tcl_SetHashValue (entryPtr, tmodel);
+ XML_MemFree (tncdata->parser, emodel);
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ tncdata->elemContentsRewriten = 1;
+ /* Checks, if every used notation name is in deed declared */
+ entryPtr = Tcl_FirstHashEntry (tncdata->notationDecls, &search);
+ while (entryPtr != NULL) {
+#ifdef TNC_DEBUG
+ printf ("check notation name %s\n",
+ Tcl_GetHashKey (tncdata->notationDecls, entryPtr));
+ printf ("value %p\n", Tcl_GetHashValue (entryPtr));
+#endif
+ if (!Tcl_GetHashValue (entryPtr)) {
+ signalNotValid (userData, TNC_ERROR_NOTATION_MUST_BE_DECLARED);
+ return;
+ }
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ /* Checks, if every used entity name is indeed declared */
+ entryPtr = Tcl_FirstHashEntry (tncdata->entityDecls, &search);
+ while (entryPtr != NULL) {
+ if (!Tcl_GetHashValue (entryPtr)) {
+ signalNotValid (userData,
+ TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED);
+ return;
+ }
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ tncdata->dtdstatus = 1;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncProbeElement --
+ *
+ * This function checks, if the element match the
+ * topmost content model on the content stack.
+ *
+ * Results:
+ * 1 if the element match,
+ * 0 if not.
+ * -1 if not, but this isn't a validation error
+ *
+ * Side effects:
+ * Eventually pushes data to the contentStack (even in
+ * recurive calls).
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static int
+TncProbeElement (
+ TNC_NameId *nameId,
+ domReadInfo *tncdata
+)
+{
+ TNC_ContentStack *stackelm;
+ TNC_Content *activeModel;
+ int myStackPtr, zeroMatchPossible, result;
+ unsigned int i, seqstartindex;
+
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement start\n");
+ printContentStack (tncdata);
+#endif
+ myStackPtr = tncdata->contentStackPtr - 1;
+ stackelm = &(tncdata->contentStack)[myStackPtr];
+ switch (stackelm->model->type) {
+ case XML_CTYPE_MIXED:
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement XML_CTYPE_MIXED\n");
+#endif
+ for (i = 0; i < stackelm->model->numchildren; i++) {
+ if ((&stackelm->model->children[i])->nameId == nameId) {
+ return 1;
+ }
+ }
+ return 0;
+ case XML_CTYPE_ANY:
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement XML_CTYPE_ANY\n");
+#endif
+ return 1;
+ case XML_CTYPE_EMPTY:
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement XML_CTYPE_EMPTY\n");
+#endif
+ return 0;
+ case XML_CTYPE_CHOICE:
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement XML_CTYPE_CHOICE\n");
+#endif
+ if (stackelm->alreadymatched) {
+ activeModel = &stackelm->model->children[stackelm->activeChild];
+ if (activeModel->type == XML_CTYPE_NAME) {
+ /* so this stackelement must be the topmost */
+ if (activeModel->quant == XML_CQUANT_REP
+ || activeModel->quant == XML_CQUANT_PLUS) {
+ /* the last matched element is multiple, maybe it
+ matches again */
+ if (nameId == activeModel->nameId) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child Nr. %d\n",
+ stackelm->activeChild);
+#endif
+ /* stack and activeChild nr. are already OK, just
+ report success. */
+ return 1;
+ }
+ }
+ }
+ /* The active child is a SEQ or CHOICE. */
+ if (stackelm->model->quant == XML_CQUANT_NONE ||
+ stackelm->model->quant == XML_CQUANT_OPT) {
+ /*The child cp's type SEQ or CHOICE keep track by
+ themselve about if they are repeated. Because we are
+ here, they don't. Since the current cp has already
+ matched and isn't multiple, the current cp as a whole
+ is done. But no contradiction detected, so return
+ "search further" */
+ return -1;
+ }
+ }
+
+ /* If one of the alternatives within the CHOICE cp is quant
+ REP or OPT, it isn't a contradition to the document structure,
+ if the cp doesn't match, even if it is quant
+ NONE or PLUS, because of the "zero time" match of this one
+ alternative. We use zeroMatchPossible, to know about this.*/
+ zeroMatchPossible = 0;
+ for (i = 0; i < stackelm->model->numchildren; i++) {
+ if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) {
+#ifdef TNC_DEBUG
+ printf ("child is type NAME\n");
+#endif
+ if ((&stackelm->model->children[i])->nameId == nameId) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child Nr. %d\n",i);
+#endif
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ (&tncdata->contentStack[myStackPtr])->alreadymatched = 1;
+ return 1;
+ }
+ else {
+ /* If the name child is optional, we have a
+ candidat for "zero match". */
+ if ((&stackelm->model->children[i])->quant
+ == XML_CQUANT_OPT ||
+ (&stackelm->model->children[i])->quant
+ == XML_CQUANT_REP) {
+#ifdef TNC_DEBUG
+ printf ("zero match possible\n");
+#endif
+ zeroMatchPossible = 1;
+ }
+ }
+ }
+ else {
+#ifdef TNC_DEBUG
+ printf ("complex child type\n");
+#endif
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *) * 2 *
+ tncdata->contentStackSize);
+ tncdata->contentStackSize *= 2;
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model
+ = &stackelm->model->children[i];
+ tncdata->contentStack[tncdata->contentStackPtr].activeChild
+ = 0;
+ tncdata->contentStack[tncdata->contentStackPtr].deep
+ = stackelm->deep + 1;
+ tncdata->contentStack[tncdata->contentStackPtr].alreadymatched
+ = 0;
+ tncdata->contentStackPtr++;
+ result = TncProbeElement (nameId, tncdata);
+ if (result == 1) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child nr. %d\n",i);
+#endif
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ (&tncdata->contentStack[myStackPtr])->alreadymatched = 1;
+ return 1;
+ }
+ /* The child cp says, it doesn't has matched, but says
+ also, it's perfectly OK, if it doesn't at all. So we
+ have a candidat for "zero match". */
+ if (result == -1) {
+ zeroMatchPossible = 1;
+ }
+ tncdata->contentStackPtr--;
+ }
+ }
+ /* OK, nobody has claimed a match. Question is: try further or is
+ this a document structure error. */
+ if (zeroMatchPossible ||
+ stackelm->alreadymatched ||
+ stackelm->model->quant == XML_CQUANT_REP ||
+ stackelm->model->quant == XML_CQUANT_OPT) {
+ return -1;
+ }
+#ifdef TNC_DEBUG
+ printf ("validation error\n");
+#endif
+ return 0;
+ case XML_CTYPE_SEQ:
+#ifdef TNC_DEBUG
+ printf ("TncProbeElement XML_CTYPE_SEQ\n");
+#endif
+ if (stackelm->alreadymatched) {
+ activeModel = &stackelm->model->children[stackelm->activeChild];
+ if (activeModel->type == XML_CTYPE_NAME) {
+ /* so this stackelement must be the topmost */
+ if (activeModel->quant == XML_CQUANT_REP
+ || activeModel->quant == XML_CQUANT_PLUS) {
+ /* the last matched element is multiple, maybe it
+ matches again */
+ if (nameId == activeModel->nameId) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child Nr. %d\n",
+ stackelm->activeChild);
+#endif
+ /* stack and activeChild nr. are already OK, just
+ report success. */
+ return 1;
+ }
+ }
+ }
+ }
+
+ if (stackelm->alreadymatched) {
+ seqstartindex = stackelm->activeChild + 1;
+ }
+ else {
+ seqstartindex = 0;
+ }
+ /* This time zeroMatchPossible flags, if every of the remaining
+ childs - that may every child, if !alreadymatched - doesn't
+ must occur. We assume, the (outstanding childs of, in case
+ of alreadymatched) current stackelement model has only
+ optional childs, and set to wrong, if we find any
+ non-optional child */
+ zeroMatchPossible = 1;
+ for (i = seqstartindex; i < stackelm->model->numchildren; i++) {
+ if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) {
+ if ((&stackelm->model->children[i])->nameId == nameId) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child Nr. %d\n",i);
+#endif
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ (&tncdata->contentStack[myStackPtr])->alreadymatched = 1;
+ return 1;
+ } else if ((&stackelm->model->children[i])->quant
+ == XML_CQUANT_NONE
+ || (&stackelm->model->children[i])->quant
+ == XML_CQUANT_PLUS) {
+ zeroMatchPossible = 0;
+ break;
+ }
+ } else {
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *) * 2 *
+ tncdata->contentStackSize);
+ tncdata->contentStackSize *= 2;
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model =
+ &stackelm->model->children[i];
+ tncdata->contentStack[tncdata->contentStackPtr].activeChild
+ = 0;
+ tncdata->contentStack[tncdata->contentStackPtr].deep
+ = stackelm->deep + 1;
+ tncdata->contentStack[tncdata->contentStackPtr].alreadymatched
+ = 0;
+ tncdata->contentStackPtr++;
+ result = TncProbeElement (nameId, tncdata);
+ if (result == 1) {
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ (&tncdata->contentStack[myStackPtr])->alreadymatched = 1;
+ return 1;
+ }
+ tncdata->contentStackPtr--;
+ if (result == 0) {
+ zeroMatchPossible = 0;
+ break;
+ }
+ }
+ }
+ if (!stackelm->alreadymatched) {
+ if (zeroMatchPossible) {
+ /* The stackelm hasn't matched, but don't have to
+ after all. Return try further */
+ return -1;
+ } else {
+ /* No previous match, but at least one child is
+ necessary. Return depends of the quant of the
+ entire seq */
+ if (stackelm->model->quant == XML_CQUANT_NONE ||
+ stackelm->model->quant == XML_CQUANT_PLUS) {
+ /* DTD claims, the seq as to be there, but isn't */
+ return 0;
+ } else {
+ /* The seq is optional */
+ return -1;
+ }
+ }
+ }
+ if (stackelm->alreadymatched) {
+ if (!zeroMatchPossible) {
+ /* Some child at the start of the seq has matched in
+ the past, but since zeroMatchPossible has changed
+ to zero, there must be a non-matching non-optional
+ child later. Error in document structure. */
+ return 0;
+ } else {
+ /* OK, SEQ has matched befor. But after the last match, there
+ where no required (quant NONE or PLUS) childs. */
+ if (stackelm->model->quant == XML_CQUANT_NONE ||
+ stackelm->model->quant == XML_CQUANT_OPT) {
+ /* The entire seq isn't multiple. Just look further. */
+ return -1;
+ }
+ }
+ }
+ /* The last untreated case is alreadymatched true,
+ zeroMatchPossible (of the rest of the seq childs after the
+ last match) true and the entire seq may be
+ multiple. Therefore start again with activeChild = 0, to
+ see, if the current nameId starts a repeated match of the
+ seq. By the way: zeroMatchPossible still has initial value
+ 1, therefor no second initialiation is needed */
+ for (i = 0; i < seqstartindex; i++) {
+ if ((&stackelm->model->children[i])->type == XML_CTYPE_NAME) {
+ if ((&stackelm->model->children[i])->nameId == nameId) {
+#ifdef TNC_DEBUG
+ printf ("-->matched! child Nr. %d\n",i);
+#endif
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ (&tncdata->contentStack[myStackPtr])->alreadymatched = 1;
+ return 1;
+ } else if ((&stackelm->model->children[i])->quant
+ == XML_CQUANT_NONE
+ || (&stackelm->model->children[i])->quant
+ == XML_CQUANT_PLUS) {
+ zeroMatchPossible = 0;
+ break;
+ }
+ } else {
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *) * 2 *
+ tncdata->contentStackSize);
+ tncdata->contentStackSize *= 2;
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model =
+ &stackelm->model->children[i];
+ tncdata->contentStack[tncdata->contentStackPtr].activeChild
+ = 0;
+ tncdata->contentStack[tncdata->contentStackPtr].deep
+ = stackelm->deep + 1;
+ tncdata->contentStack[tncdata->contentStackPtr].alreadymatched
+ = 0;
+ tncdata->contentStackPtr++;
+ result = TncProbeElement (nameId, tncdata);
+ if (result) {
+ (&tncdata->contentStack[myStackPtr])->activeChild = i;
+ /* alreadymatched is already 1 */
+ return 1;
+ }
+ tncdata->contentStackPtr--;
+ if (result == 0) {
+ /* OK, the seq doesn't match again. But since it have
+ already matched, this isn't return 0 but.. */
+ return -1;
+ }
+ }
+ }
+ /* seq doesn't match again and every seq child from the very first
+ up to (not including) the last match aren't required. This last
+ fact may be nice to know, but after all since the entire seq have
+ matched already ... */
+ return -1;
+ case XML_CTYPE_NAME:
+ /* NAME type dosen't occur at top level of a content model and is
+ handled in some "shotcut" way directly in the CHOICE and SEQ cases.
+ It's only here to pacify gcc -Wall. */
+ printf ("error!!! - in TncProbeElement: XML_CTYPE_NAME shouldn't reached in any case.\n");
+ default:
+ printf ("error!!! - in TncProbeElement: unknown content type: %d\n",
+ stackelm->model->type);
+ }
+ /* not reached */
+ printf ("error!!! - in TncProbeElement: end of function reached.\n");
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncProbeAttribute --
+ *
+ * This function checks, if the given attribute
+ * and it's value are allowed for this element.
+ *
+ * Results:
+ * 1 if the attribute name/value is OK,
+ * 0 if not.
+ *
+ * Side effects:
+ * Eventually increments the required attributes counter.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static int
+TncProbeAttribute (
+ void *userData,
+ Tcl_HashTable *elemAtts,
+ char *attrName,
+ char *attrValue,
+ int *nrOfreq
+)
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ Tcl_HashEntry *entryPtr;
+ TNC_AttDecl *attDecl;
+ char *pc, *copy, save;
+ int clen, i, start, hnew;
+ TNC_EntityInfo *entityInfo;
+
+ entryPtr = Tcl_FindHashEntry (elemAtts, attrName);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_UNKNOWN_ATTRIBUTE);
+ return 0;
+ }
+ /* NOTE: attribute uniqueness per element is a wellformed
+ constrain and therefor done by expat. */
+ attDecl = (TNC_AttDecl *) Tcl_GetHashValue (entryPtr);
+ switch (attDecl->att_type) {
+ case TNC_ATTTYPE_CDATA:
+ if (attDecl->isrequired && attDecl->dflt) {
+ if (strcmp (attDecl->dflt, attrValue) != 0) {
+ signalNotValid (userData,
+ TNC_ERROR_WRONG_FIXED_ATTVALUE);
+ return 0;
+ }
+ }
+ break;
+
+ case TNC_ATTTYPE_ID:
+ pc = (char*)attrValue;
+ clen = UTF8_CHAR_LEN (*pc);
+ CHECK_UTF_CHARLENR (clen);
+ if (!UTF8_GET_NAME_START (pc, clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ }
+ pc += clen;
+ while (1) {
+ if (*pc == '\0') {
+ break;
+ }
+ clen = UTF8_CHAR_LEN (*pc);
+ CHECK_UTF_CHARLENR (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return 0;
+ }
+ pc += clen;
+ }
+ entryPtr = Tcl_CreateHashEntry (tncdata->ids, attrValue, &hnew);
+ if (!hnew) {
+ if (Tcl_GetHashValue (entryPtr)) {
+ signalNotValid (userData,
+ TNC_ERROR_DUPLICATE_ID_VALUE);
+ return 0;
+ }
+ }
+ Tcl_SetHashValue (entryPtr, (char *) 1);
+ break;
+
+ case TNC_ATTTYPE_IDREF:
+ /* Name type constraint "implicit" checked. If the
+ referenced ID exists, the type must be OK, because the
+ type of the ID's within the document are checked.
+ If there isn't such an ID, it's an error anyway. */
+ if (attrValue[0] == '\0') {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return 0;
+ }
+ entryPtr = Tcl_CreateHashEntry (tncdata->ids, attrValue, &hnew);
+ break;
+
+ case TNC_ATTTYPE_IDREFS:
+ if (attrValue[0] == '\0') {
+ signalNotValid (userData, TNC_ERROR_NAMES_REQUIRED);
+ return 0;
+ }
+ /* Due to attribute value normalization (xml rec 3.3.3) this
+ is a simple list "ref ref ref ..." without leading or
+ trailing spaces and exact one space between the refs. */
+ start = i = 0;
+ while (attrValue[i]) {
+ if (attrValue[i] == ' ') {
+ save = attrValue[i];
+ attrValue[i] = '\0';
+ entryPtr = Tcl_CreateHashEntry (tncdata->ids,
+ &attrValue[start], &hnew);
+ attrValue[i] = save;
+ start = ++i;
+ continue;
+ }
+ i++;
+ }
+ entryPtr = Tcl_CreateHashEntry (tncdata->ids, &attrValue[start],
+ &hnew);
+ break;
+
+ case TNC_ATTTYPE_ENTITY:
+ /* There is a validity constraint requesting entity attributes
+ values to be type Name. But if there would be an entity
+ declaration that doesn't fit this constraint, expat would
+ have already complained about the definition. So we go the
+ easy way and just look up the att value. If it's declared,
+ type must be OK, if not, it's an error anyway. */
+ entryPtr = Tcl_FindHashEntry (tncdata->entityDecls, attrValue);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_ENTITY_ATTRIBUTE);
+ return 0;
+ }
+ entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr);
+ if (!entityInfo->is_notation) {
+ signalNotValid (userData, TNC_ERROR_ENTITY_ATTRIBUTE);
+ return 0;
+ }
+ break;
+
+ case TNC_ATTTYPE_ENTITIES:
+ /* Normalized by exapt; for type see comment to
+ TNC_ATTTYPE_ENTITY */
+ copy = tdomstrdup (attrValue);
+ start = i = 0;
+ while (1) {
+ if (copy[i] == '\0') {
+ entryPtr = Tcl_FindHashEntry (tncdata->entityDecls,
+ ©[start]);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE);
+ FREE (copy);
+ return 0;
+ }
+ entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr);
+ if (!entityInfo->is_notation) {
+ signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE);
+ FREE (copy);
+ return 0;
+ }
+ FREE (copy);
+ break;
+ }
+ if (copy[i] == ' ') {
+ copy[i] = '\0';
+ entryPtr = Tcl_FindHashEntry (tncdata->entityDecls,
+ ©[start]);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE);
+ FREE (copy);
+ return 0;
+ }
+ entityInfo = (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr);
+ if (!entityInfo->is_notation) {
+ signalNotValid (userData, TNC_ERROR_ENTITIES_ATTRIBUTE);
+ FREE (copy);
+ return 0;
+ }
+ start = ++i;
+ continue;
+ }
+ i++;
+ }
+ break;
+
+ case TNC_ATTTYPE_NMTOKEN:
+ /* We assume, that the UTF-8 representation of the value is
+ valid (no partial chars, minimum encoding). This makes
+ things a little more easy and faster. I guess (but
+ haven't deeply checked - QUESTION -), expat would have
+ already complained otherwise. */
+ pc = (char*)attrValue;
+ clen = 0;
+ while (1) {
+ if (*pc == '\0') {
+ break;
+ }
+ clen = UTF8_CHAR_LEN (*pc);
+ CHECK_UTF_CHARLENR (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ return 0;
+ }
+ pc += clen;
+ }
+ if (!clen)
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ break;
+
+ case TNC_ATTTYPE_NMTOKENS:
+ pc = (char*)attrValue;
+ clen = 0;
+ while (1) {
+ if (*pc == '\0') {
+ break;
+ }
+ /* NMTOKENS are normalized by expat, so this should
+ be secure. */
+ if (*pc == ' ') {
+ pc++;
+ }
+ clen = UTF8_CHAR_LEN (*pc);
+ CHECK_UTF_CHARLENR (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (pc, clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ return 0;
+ }
+ pc += clen;
+ }
+ if (!clen)
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ break;
+
+ case TNC_ATTTYPE_NOTATION:
+ entryPtr = Tcl_FindHashEntry (attDecl->lookupTable, attrValue);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_NOTATION_REQUIRED);
+ return 0;
+ }
+ break;
+
+ case TNC_ATTTYPE_ENUMERATION:
+ if (!Tcl_FindHashEntry (attDecl->lookupTable, attrValue)) {
+ signalNotValid (userData, TNC_ERROR_ENUM_ATT_WRONG_VALUE);
+ return 0;
+ }
+ break;
+ }
+
+ if (attDecl->isrequired) {
+ (*nrOfreq)++;
+ }
+
+ return 1;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncProbeElementEnd --
+ *
+ * This procedure checks, if the current content allows the
+ * the element to end here.
+ *
+ * Results:
+ * 1 if element end is OK,
+ * 0 if not.
+ *
+ * Side effects:
+ * Let the contentStackPtr point to the last current content
+ * model before the element had started.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static int
+TncProbeElementEnd (
+ domReadInfo *tncdata
+)
+{
+ TNC_ContentStack stackelm;
+ unsigned int i;
+ int zeroMatchPossible, seqstartindex;
+
+ stackelm = tncdata->contentStack[tncdata->contentStackPtr - 1];
+ switch (stackelm.model->type) {
+ case XML_CTYPE_MIXED:
+ case XML_CTYPE_ANY:
+ case XML_CTYPE_EMPTY:
+ return 1;
+ case XML_CTYPE_CHOICE:
+ if (stackelm.alreadymatched) {
+ return 1;
+ }
+
+ if (stackelm.model->quant == XML_CQUANT_REP ||
+ stackelm.model->quant == XML_CQUANT_OPT) {
+ return 1;
+ }
+ zeroMatchPossible = 0;
+ for (i = 0; i < stackelm.model->numchildren; i++) {
+ if ((&stackelm.model->children[i])->type == XML_CTYPE_NAME) {
+ if ((&stackelm.model->children[i])->quant == XML_CQUANT_OPT ||
+ (&stackelm.model->children[i])->quant == XML_CQUANT_REP) {
+ zeroMatchPossible = 1;
+ break;
+ }
+ }
+ else {
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *) * 2 *
+ tncdata->contentStackSize);
+ tncdata->contentStackSize *= 2;
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model
+ = &stackelm.model->children[i];
+ tncdata->contentStack[tncdata->contentStackPtr].activeChild
+ = 0;
+ tncdata->contentStack[tncdata->contentStackPtr].deep
+ = stackelm.deep + 1;
+ tncdata->contentStack[tncdata->contentStackPtr].alreadymatched
+ = 0;
+ tncdata->contentStackPtr++;
+ if (TncProbeElementEnd (tncdata)) {
+ zeroMatchPossible = 1;
+ tncdata->contentStackPtr--;
+ break;
+ }
+ tncdata->contentStackPtr--;
+ }
+ }
+ if (zeroMatchPossible) {
+ return 1;
+ } else {
+ return 0;
+ }
+ case XML_CTYPE_SEQ:
+ if (!stackelm.alreadymatched) {
+ if (stackelm.model->quant == XML_CQUANT_REP ||
+ stackelm.model->quant == XML_CQUANT_OPT) {
+ return 1;
+ }
+ }
+ if (!stackelm.alreadymatched) {
+ seqstartindex = 0;
+ }
+ else {
+ seqstartindex = stackelm.activeChild + 1;
+ }
+ for (i = seqstartindex; i < stackelm.model->numchildren; i++) {
+ if ((&stackelm.model->children[i])->type == XML_CTYPE_NAME) {
+ if ((&stackelm.model->children[i])->quant == XML_CQUANT_OPT ||
+ (&stackelm.model->children[i])->quant == XML_CQUANT_REP) {
+ continue;
+ } else {
+ return 0;
+ }
+ } else {
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *) * 2 *
+ tncdata->contentStackSize);
+ tncdata->contentStackSize *= 2;
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model
+ = &stackelm.model->children[i];
+ tncdata->contentStack[tncdata->contentStackPtr].activeChild
+ = 0;
+ tncdata->contentStack[tncdata->contentStackPtr].deep
+ = stackelm.deep + 1;
+ tncdata->contentStack[tncdata->contentStackPtr].alreadymatched
+ = 0;
+ tncdata->contentStackPtr++;
+ if (TncProbeElementEnd (tncdata)) {
+ tncdata->contentStackPtr--;
+ continue;
+ }
+ else {
+ tncdata->contentStackPtr--;
+ return 0;
+ }
+ }
+ }
+ return 1;
+ case XML_CTYPE_NAME:
+ /* NAME type dosen't occur at top level of a content model and is
+ handled in some "shotcut" way directly in the CHOICE and SEQ cases.
+ It's only here to pacify gcc -Wall. */
+ fprintf (stderr, "error!!! - in TncProbeElementEnd: XML_CTYPE_NAME "
+ "shouldn't be reached in any case.\n");
+ default:
+ fprintf (stderr, "error!!! - in TncProbeElementEnd: unknown content "
+ "type: %d\n", stackelm.model->type);
+ return 1;
+ }
+}
+#endif
/*---------------------------------------------------------------------------
| startElement
|
\--------------------------------------------------------------------------*/
@@ -1163,10 +2317,149 @@
Tcl_ResetResult (info->interp);
}
}
DispatchPCDATA (info);
+
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info->dtdvalidation) {
+ domReadInfo *tncdata = info;
+ Tcl_HashEntry *entryPtr;
+ Tcl_HashTable *elemAtts;
+ const char **atPtr;
+ TNC_ElemAttInfo *elemAttInfo;
+ TNC_Content *model;
+ int result, nrOfreq, acceptNoDoctype = 0;
+
+#ifdef TNC_DEBUG
+ printf ("TncElementStartCommand name: %s\n", name);
+#endif
+
+ /* If the document doesn't have a doctype declaration, but the
+ user have used the -useForeignDTD 1 feature, the collected
+ data out of the provided DTD isn't postprocessed by
+ TncElementStartCommand. We do this now.
+ NOTE: Since there wasn't a doctype declaration, there is no
+ information available which element is expected to be the
+ document element. Eventually it would be desirable, to set
+ this somehow. For now, this means, that every valid subtree
+ of the given DTD information is accepted. */
+ if (!tncdata->contentStackPtr && !tncdata->elemContentsRewriten) {
+ TncEndDoctypeDeclHandler (userData);
+ acceptNoDoctype = 1;
+ }
+
+ entryPtr = Tcl_FindHashEntry (tncdata->tagNames, name);
+ if (!entryPtr) {
+ signalNotValid (userData, TNC_ERROR_UNKNOWN_ELEMENT);
+ return;
+ }
+ model = (TNC_Content *) Tcl_GetHashValue (entryPtr);
+
+ switch (model->type) {
+ case XML_CTYPE_MIXED:
+ case XML_CTYPE_ANY:
+ tncdata->skipWhiteCDATAs = 1;
+ tncdata->ignorePCDATA = 1;
+ break;
+ case XML_CTYPE_EMPTY:
+ tncdata->skipWhiteCDATAs = 0;
+ break;
+ case XML_CTYPE_CHOICE:
+ case XML_CTYPE_SEQ:
+ tncdata->skipWhiteCDATAs = 1;
+ tncdata->ignorePCDATA = 0;
+ break;
+ case XML_CTYPE_NAME:
+ break;
+ }
+
+ if (tncdata->contentStackPtr) {
+ /* This is the normal case, within some content,
+ at least the root element content. */
+ while (1) {
+ result = TncProbeElement (entryPtr, tncdata);
+ if (result == -1) {
+ if (tncdata->contentStack[tncdata->contentStackPtr - 1].deep
+ == 0) {
+ signalNotValid (userData,
+ TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE);
+ return;
+ }
+ tncdata->contentStackPtr--;
+ continue;
+ }
+ if (result) {
+ break;
+ }
+ if (!result) {
+ signalNotValid (userData, TNC_ERROR_ELEMENT_NOT_ALLOWED_HERE);
+ return;
+ }
+ }
+ if (tncdata->contentStackPtr == tncdata->contentStackSize) {
+ tncdata->contentStackSize *= 2;
+ tncdata->contentStack = (TNC_ContentStack *)
+ Tcl_Realloc ((char *)tncdata->contentStack,
+ sizeof (TNC_Content *)*tncdata->contentStackSize);
+ }
+ (&tncdata->contentStack[tncdata->contentStackPtr])->model = model;
+ (&tncdata->contentStack[tncdata->contentStackPtr])->activeChild = 0;
+ (&tncdata->contentStack[tncdata->contentStackPtr])->deep = 0;
+ (&tncdata->contentStack[tncdata->contentStackPtr])->alreadymatched = 0;
+ tncdata->contentStackPtr++;
+ } else {
+ /* This is only in case of the root element */
+ if (atts) {
+ if (!tncdata->doctypeName) {
+ if (!acceptNoDoctype) {
+ signalNotValid (userData, TNC_ERROR_NO_DOCTYPE_DECL);
+ return;
+ }
+ } else {
+ if (strcmp (tncdata->doctypeName, name) != 0) {
+ signalNotValid (userData, TNC_ERROR_WRONG_ROOT_ELEMENT);
+ return;
+ }
+ }
+ }
+ (&(tncdata->contentStack)[0])->model = model;
+ (&(tncdata->contentStack)[0])->activeChild = 0;
+ (&(tncdata->contentStack)[0])->deep = 0;
+ (&(tncdata->contentStack)[0])->alreadymatched = 0;
+ tncdata->contentStackPtr++;
+ }
+
+ if (atts) {
+ elemAttInfo = model->attInfo;
+ if (!elemAttInfo) {
+ if (atts[0] != NULL) {
+ signalNotValid (userData, TNC_ERROR_NO_ATTRIBUTES);
+ return;
+ }
+ } else {
+ elemAtts = elemAttInfo->attributes;
+ nrOfreq = 0;
+ for (atPtr = atts; atPtr[0]; atPtr += 2) {
+ if (!TncProbeAttribute (userData, elemAtts, (char *) atPtr[0],
+ (char *) atPtr[1], &nrOfreq))
+ return;
+ }
+ if (nrOfreq != elemAttInfo->nrOfreq) {
+ signalNotValid (userData,
+ TNC_ERROR_MISSING_REQUIRED_ATTRIBUTE);
+ return;
+ }
+ }
+ } else {
+ tncdata->elemAttInfo = model->attInfo;
+ }
+#ifdef TNC_DEBUG
+ printf ("TncElementStartCommand end\n");
+#endif
+ }
+#endif
h = Tcl_CreateHashEntry(&HASHTAB(info->document,tdom_tagNames), name,
&hnew);
if (info->storeLineColumn) {
node = (domNode*) domAlloc(sizeof(domNode)
@@ -1442,10 +2735,80 @@
)
{
domReadInfo *info = userData;
DispatchPCDATA (info);
+
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info->dtdvalidation) {
+ domReadInfo *tncdata = info;
+ Tcl_HashEntry *entryPtr;
+ Tcl_HashSearch search;
+
+#ifdef TNC_DEBUG
+ printf ("TncElementEndCommand start\n");
+ printContentStack (tncdata);
+#endif
+ /* Calling XML_StopParser() in an element start event doesn't
+ * always prevent expat from calling the element end handler.
+ * (Yes, it's an expat thing, raised a few bugs upstream.) */
+ if (tncdata->status == TCL_ERROR) return;
+ while (1) {
+ if (!TncProbeElementEnd (tncdata)) {
+ signalNotValid (userData, TNC_ERROR_ELEMENT_CAN_NOT_END_HERE);
+ return;
+ }
+ if (tncdata->contentStack[tncdata->contentStackPtr - 1].deep == 0) {
+ break;
+ }
+ tncdata->contentStackPtr--;
+ }
+ /* Remove the content model of the closed element from the stack */
+ tncdata->contentStackPtr--;
+#ifdef TNC_DEBUG
+ printf ("after removing ended element from the stack\n");
+ printContentStack (tncdata);
+#endif
+ if (tncdata->contentStackPtr) {
+ switch ((&tncdata->contentStack[tncdata->contentStackPtr - 1])->model->type) {
+ case XML_CTYPE_MIXED:
+ case XML_CTYPE_ANY:
+ tncdata->skipWhiteCDATAs = 1;
+ tncdata->ignorePCDATA = 1;
+ break;
+ case XML_CTYPE_EMPTY:
+ tncdata->skipWhiteCDATAs = 0;
+ break;
+ case XML_CTYPE_CHOICE:
+ case XML_CTYPE_SEQ:
+ case XML_CTYPE_NAME:
+ tncdata->skipWhiteCDATAs = 1;
+ tncdata->ignorePCDATA = 0;
+ break;
+ }
+ } else {
+ /* This means, the root element is closed,
+ therefor the place to check, if every IDREF points
+ to a ID. */
+ if (tncdata->idCheck) {
+ for (entryPtr = Tcl_FirstHashEntry (tncdata->ids, &search);
+ entryPtr != NULL;
+ entryPtr = Tcl_NextHashEntry (&search)) {
+#ifdef TNC_DEBUG
+ printf ("check id value %s\n",
+ Tcl_GetHashKey (tncdata->ids, entryPtr));
+ printf ("value %p\n", Tcl_GetHashValue (entryPtr));
+#endif
+ if (!Tcl_GetHashValue (entryPtr)) {
+ signalNotValid (userData, TNC_ERROR_UNKNOWN_ID_REFERRED);
+ return;
+ }
+ }
+ }
+ }
+ }
+#endif
info->depth--;
if (!info->ignorexmlns) {
/* pop active namespaces */
while ( (info->activeNSpos >= 0) &&
@@ -1473,17 +2836,41 @@
|
\--------------------------------------------------------------------------*/
static void
characterDataHandler (
void *userData,
- const char *s,
+ const char *data,
int len
)
{
domReadInfo *info = userData;
- Tcl_DStringAppend (info->cdata, s, len);
+ Tcl_DStringAppend (info->cdata, data, len);
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info->dtdvalidation) {
+ domReadInfo *tncdata = info;
+ int i;
+ char *pc;
+
+ if (!tncdata->skipWhiteCDATAs && len > 0) {
+ signalNotValid (userData, TNC_ERROR_EMPTY_ELEMENT);
+ return;
+ }
+ if (!tncdata->ignorePCDATA) {
+ for (i = 0, pc = (char*)data; i < len; i++, pc++) {
+ if ( (*pc == ' ') ||
+ (*pc == '\n') ||
+ (*pc == '\r') ||
+ (*pc == '\t') ) {
+ continue;
+ }
+ signalNotValid (userData, TNC_ERROR_DISALLOWED_PCDATA);
+ return;
+ }
+ }
+ }
+#endif
return;
}
/*---------------------------------------------------------------------------
@@ -2120,10 +3507,583 @@
{
domReadInfo *info = (domReadInfo *) userData;
info->insideDTD = 0;
}
+
+#ifndef TDOM_NO_DTD_VALIDATION
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncElementDeclCommand --
+ *
+ * This procedure is called for every element declaration.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Stores the tag name of the element in a lookup table.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncElementDeclCommand (
+ void *userData,
+ const char *name,
+ XML_Content *model
+)
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ Tcl_HashEntry *entryPtr;
+ int newPtr;
+ unsigned int i, j;
+
+ entryPtr = Tcl_CreateHashEntry (tncdata->tagNames, name, &newPtr);
+ /* "No element type may be declared more than once." (rec. 3.2) */
+ if (!newPtr) {
+ XML_FreeContentModel (tncdata->parser, model);
+ signalNotValid (userData, TNC_ERROR_DUPLICATE_ELEMENT_DECL);
+ return;
+ }
+ /* "The same name must not appear more than once in a
+ single mixed-content declaration." (rec. 3.2.2)
+ NOTE: OK, OK, doing it this way may not be optimal or even fast
+ in some cases. Please step in with a more fancy solution, if you
+ feel the need. */
+ if (model->type == XML_CTYPE_MIXED && model->quant == XML_CQUANT_REP) {
+ for (i = 0; i < model->numchildren; i++) {
+ for (j = i + 1; j < model->numchildren; j++) {
+ if (strcmp ((&model->children[i])->name,
+ (&model->children[j])->name) == 0) {
+ XML_FreeContentModel (tncdata->parser, model);
+ signalNotValid (userData,
+ TNC_ERROR_DUPLICATE_MIXED_ELEMENT);
+ return;
+ }
+ }
+ }
+ }
+ Tcl_SetHashValue (entryPtr, model);
+ return;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncAttDeclCommand --
+ *
+ * This procedure is called for *each* attribute in an XML
+ * ATTLIST declaration. It stores the attribute definition in
+ * an element specific hash table.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Stores the tag name of the element in a lookup table.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncAttDeclCommand (
+ void *userData,
+ const char *elname,
+ const char *attname,
+ const char *att_type,
+ const char *dflt,
+ int isrequired
+)
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ Tcl_HashEntry *entryPtr, *entryPtr1;
+ Tcl_HashTable *elemAtts;
+ TNC_ElemAttInfo *elemAttInfo;
+ TNC_AttDecl *attDecl;
+ TNC_EntityInfo *entityInfo;
+ int newPtr, start, i, clen;
+ char *copy;
+
+ entryPtr = Tcl_CreateHashEntry (tncdata->attDefsTables, elname, &newPtr);
+ if (newPtr) {
+ elemAttInfo = (TNC_ElemAttInfo *) MALLOC (sizeof (TNC_ElemAttInfo));
+ elemAtts = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (elemAtts, TCL_STRING_KEYS);
+ elemAttInfo->attributes = elemAtts;
+ elemAttInfo->nrOfreq = 0;
+ elemAttInfo->nrOfIdAtts = 0;
+ Tcl_SetHashValue (entryPtr, elemAttInfo);
+ } else {
+ elemAttInfo = (TNC_ElemAttInfo *) Tcl_GetHashValue (entryPtr);
+ elemAtts = elemAttInfo->attributes;
+ }
+ entryPtr = Tcl_CreateHashEntry (elemAtts, attname, &newPtr);
+ /* Multiple Attribute declarations are allowed, but later declarations
+ are ignored. See rec 3.3. */
+ if (newPtr) {
+ attDecl = (TNC_AttDecl *) MALLOC (sizeof (TNC_AttDecl));
+ if (strcmp (att_type, "CDATA") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_CDATA;
+ }
+ else if (strcmp (att_type, "ID") == 0) {
+ if (elemAttInfo->nrOfIdAtts) {
+ signalNotValid (userData, TNC_ERROR_MORE_THAN_ONE_ID_ATT);
+ return;
+ }
+ elemAttInfo->nrOfIdAtts++;
+ if (dflt != NULL) {
+ signalNotValid (userData, TNC_ERROR_ID_ATT_DEFAULT);
+ return;
+ }
+ attDecl->att_type = TNC_ATTTYPE_ID;
+ }
+ else if (strcmp (att_type, "IDREF") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_IDREF;
+ }
+ else if (strcmp (att_type, "IDREFS") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_IDREFS;
+ }
+ else if (strcmp (att_type, "ENTITY") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_ENTITY;
+ }
+ else if (strcmp (att_type, "ENTITIES") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_ENTITIES;
+ }
+ else if (strcmp (att_type, "NMTOKEN") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_NMTOKEN;
+ }
+ else if (strcmp (att_type, "NMTOKENS") == 0) {
+ attDecl->att_type = TNC_ATTTYPE_NMTOKENS;
+ }
+ else if (strncmp (att_type, "NOTATION(", 9) == 0) {
+ /* This is a bit puzzling. expat returns something like
+
+
+ as att_type "NOTATION(gif)". */
+ attDecl->att_type = TNC_ATTTYPE_NOTATION;
+ attDecl->lookupTable =
+ (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (attDecl->lookupTable, TCL_STRING_KEYS);
+ copy = tdomstrdup (att_type);
+ start = i = 9;
+ while (i) {
+ if (copy[i] == ')') {
+ copy[i] = '\0';
+#ifdef TNC_DEBUG
+ printf ("att type NOTATION: notation %s allowed\n",
+ ©[start]);
+#endif
+ Tcl_CreateHashEntry (attDecl->lookupTable,
+ ©[start], &newPtr);
+ entryPtr1 = Tcl_CreateHashEntry (tncdata->notationDecls,
+ ©[start], &newPtr);
+#ifdef TNC_DEBUG
+ if (newPtr) {
+ printf ("up to now unknown NOTATION\n");
+ } else {
+ printf ("NOTATION already known\n");
+ }
+#endif
+ FREE (copy);
+ break;
+ }
+ if (copy[i] == '|') {
+ copy[i] = '\0';
+#ifdef TNC_DEBUG
+ printf ("att type NOTATION: notation %s allowed\n",
+ ©[start]);
+#endif
+ Tcl_CreateHashEntry (attDecl->lookupTable,
+ ©[start], &newPtr);
+ entryPtr1 = Tcl_CreateHashEntry (tncdata->notationDecls,
+ ©[start], &newPtr);
+#ifdef TNC_DEBUG
+ if (newPtr) {
+ printf ("up to now unknown NOTATION\n");
+ } else {
+ printf ("NOTATION already known\n");
+ }
+#endif
+ start = ++i;
+ continue;
+ }
+ clen = UTF8_CHAR_LEN (copy[i]);
+ CHECK_UTF_CHARLEN_COPY (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ FREE (copy);
+ return;
+ }
+ i += clen;
+ }
+ }
+ else {
+ /* expat returns something like
+
+ as att_type "(numbered|bullets)", e.g. in some
+ "non-official" normalized way.
+ Makes things easier for us. */
+ attDecl->att_type = TNC_ATTTYPE_ENUMERATION;
+ attDecl->lookupTable =
+ (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (attDecl->lookupTable, TCL_STRING_KEYS);
+ copy = tdomstrdup (att_type);
+ start = i = 1;
+ while (1) {
+ if (copy[i] == ')') {
+ copy[i] = '\0';
+ Tcl_CreateHashEntry (attDecl->lookupTable,
+ ©[start], &newPtr);
+ FREE (copy);
+ break;
+ }
+ if (copy[i] == '|') {
+ copy[i] = '\0';
+ Tcl_CreateHashEntry (attDecl->lookupTable,
+ ©[start], &newPtr);
+ start = ++i;
+ continue;
+ }
+ clen = UTF8_CHAR_LEN (copy[i]);
+ CHECK_UTF_CHARLEN_COPY (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ FREE (copy);
+ return;
+ }
+ i += clen;
+ }
+ }
+ if (dflt != NULL) {
+ switch (attDecl->att_type) {
+ case TNC_ATTTYPE_ENTITY:
+ case TNC_ATTTYPE_IDREF:
+ clen = UTF8_CHAR_LEN (*dflt);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAME_START (dflt, clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return;
+ }
+ i = clen;
+ while (1) {
+ if (dflt[i] == '\0') {
+ break;
+ }
+ clen = UTF8_CHAR_LEN (dflt[i]);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return;
+ }
+ i += clen;
+ }
+ if (attDecl->att_type == TNC_ATTTYPE_ENTITY) {
+ entryPtr1 = Tcl_CreateHashEntry (tncdata->entityDecls,
+ dflt, &newPtr);
+ if (!newPtr) {
+ entityInfo =
+ (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr1);
+ if (!entityInfo->is_notation) {
+ signalNotValid (userData,TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED);
+ }
+ }
+ }
+ break;
+ case TNC_ATTTYPE_IDREFS:
+ start = i = 0;
+ while (1) {
+ if (dflt[i] == '\0') {
+ break;
+ }
+ if (dflt[i] == ' ') {
+ start = ++i;
+ }
+ if (start == i) {
+ clen = UTF8_CHAR_LEN (dflt[i]);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAME_START (&dflt[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return;
+ }
+ i += clen;
+ }
+ else {
+ clen = UTF8_CHAR_LEN (dflt[i]);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ return;
+ }
+ i += clen;
+ }
+ }
+ break;
+ case TNC_ATTTYPE_ENTITIES:
+ copy = tdomstrdup (dflt);
+ start = i = 0;
+ while (1) {
+ if (copy[i] == '\0') {
+ FREE (copy);
+ break;
+ }
+ if (copy[i] == ' ') {
+ copy[i] = '\0';
+ entryPtr1 = Tcl_CreateHashEntry (tncdata->entityDecls,
+ ©[start],
+ &newPtr);
+ if (!newPtr) {
+ entityInfo =
+ (TNC_EntityInfo *) Tcl_GetHashValue (entryPtr1);
+ if (!entityInfo->is_notation) {
+ signalNotValid (userData,TNC_ERROR_ATT_ENTITY_DEFAULT_MUST_BE_DECLARED);
+ }
+ }
+ start = ++i;
+ }
+ if (start == i) {
+ clen = UTF8_CHAR_LEN (copy[i]);
+ CHECK_UTF_CHARLEN_COPY (clen);
+ if (!UTF8_GET_NAME_START (©[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ FREE (copy);
+ return;
+ }
+ i += clen;
+ }
+ else {
+ clen = UTF8_CHAR_LEN (copy[i]);
+ CHECK_UTF_CHARLEN_COPY (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (©[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NAME_REQUIRED);
+ FREE (copy);
+ return;
+ }
+ i += clen;
+ }
+ }
+ break;
+ case TNC_ATTTYPE_NMTOKEN:
+ i = 0;
+ while (1) {
+ if (dflt[i] == '\0') {
+ break;
+ }
+ clen = UTF8_CHAR_LEN (dflt[i]);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ return;
+ }
+ i += clen;
+ }
+ if (!i) signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ break;
+ case TNC_ATTTYPE_NMTOKENS:
+ i = 0;
+ while (1) {
+ if (dflt[i] == '\0') {
+ break;
+ }
+ if (dflt[i] == ' ') {
+ i++;
+ }
+ clen = UTF8_CHAR_LEN (dflt[i]);
+ CHECK_UTF_CHARLEN (clen);
+ if (!UTF8_GET_NAMING_NMTOKEN (&dflt[i], clen)) {
+ signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ return;
+ }
+ i += clen;
+ }
+ if (!i) signalNotValid (userData, TNC_ERROR_NMTOKEN_REQUIRED);
+ break;
+ case TNC_ATTTYPE_NOTATION:
+ if (!Tcl_FindHashEntry (attDecl->lookupTable, dflt)) {
+ signalNotValid (userData, TNC_ERROR_IMPOSSIBLE_DEFAULT);
+ return;
+ }
+ case TNC_ATTTYPE_ENUMERATION:
+ if (!Tcl_FindHashEntry (attDecl->lookupTable, dflt)) {
+ signalNotValid (userData, TNC_ERROR_IMPOSSIBLE_DEFAULT);
+ return;
+ }
+ case TNC_ATTTYPE_CDATA:
+ case TNC_ATTTYPE_ID:
+ /* This both cases are only there, to pacify -Wall.
+ CDATA may have any allowed characters (and
+ everything else is detected by extpat). ID's not
+ allowed to have defaults (handled above). */
+ ;
+ }
+ attDecl->dflt = tdomstrdup (dflt);
+ }
+ else {
+ attDecl->dflt = NULL;
+ }
+ if (isrequired) {
+ elemAttInfo->nrOfreq++;
+ }
+ attDecl->isrequired = isrequired;
+ Tcl_SetHashValue (entryPtr, attDecl);
+ }
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncNotationDeclHandler --
+ *
+ * This procedure is called for every notation declaration.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Stores the notationName in the notationDecls table with value
+ * one.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncNotationDeclHandler (
+ void *userData,
+ const char *notationName,
+ const char *base,
+ const char *systemId,
+ const char *publicId
+)
+{
+ domReadInfo *tncdata = (domReadInfo *) userData;
+ Tcl_HashEntry *entryPtr;
+ int newPtr;
+
+ entryPtr = Tcl_CreateHashEntry (tncdata->notationDecls,
+ notationName,
+ &newPtr);
+#ifdef TNC_DEBUG
+ printf ("Notation %s declared\n", notationName);
+#endif
+ Tcl_SetHashValue (entryPtr, (char *) 1);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * TncFreeTncModel --
+ *
+ * This helper procedure frees recursively TNC_Contents.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Frees memory.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static void
+TncFreeTncModel (
+ TNC_Content *tmodel
+)
+{
+ unsigned int i;
+
+ if (tmodel->children) {
+ for (i = 0; i < tmodel->numchildren; i++) {
+ TncFreeTncModel (&tmodel->children[i]);
+ }
+ FREE ((char *) tmodel->children);
+ }
+}
+
+static void
+TncFreeValidationData (
+ domReadInfo *info
+ )
+{
+ Tcl_HashEntry *entryPtr, *attentryPtr;
+ Tcl_HashSearch search, attsearch;
+ TNC_Content *model;
+ TNC_ElemAttInfo *elemAttInfo;
+ TNC_EntityInfo *entityInfo;
+ TNC_AttDecl *attDecl;
+
+ if (!info->dtdvalidation) return;
+
+ entryPtr = Tcl_FirstHashEntry (info->tagNames, &search);
+ while (entryPtr) {
+ model = Tcl_GetHashValue (entryPtr);
+ if (info->elemContentsRewriten) {
+ if (model) {
+ TncFreeTncModel (model);
+ FREE ((char *) model);
+ }
+ } else {
+ XML_FreeContentModel(info->parser, (XML_Content *)model);
+ }
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ Tcl_DeleteHashTable (info->tagNames);
+ entryPtr = Tcl_FirstHashEntry (info->attDefsTables, &search);
+ while (entryPtr) {
+ elemAttInfo = Tcl_GetHashValue (entryPtr);
+ if (!elemAttInfo) {
+ entryPtr = Tcl_NextHashEntry (&search);
+ continue;
+ }
+ attentryPtr = Tcl_FirstHashEntry (elemAttInfo->attributes, &attsearch);
+ while (attentryPtr) {
+ attDecl = Tcl_GetHashValue (attentryPtr);
+ if (attDecl) {
+ if (attDecl->att_type == TNC_ATTTYPE_NOTATION ||
+ attDecl->att_type == TNC_ATTTYPE_ENUMERATION) {
+ Tcl_DeleteHashTable (attDecl->lookupTable);
+ FREE ((char *) attDecl->lookupTable);
+ }
+ if (attDecl->dflt) {
+ FREE (attDecl->dflt);
+ }
+ FREE ((char *) attDecl);
+ }
+ attentryPtr = Tcl_NextHashEntry (&attsearch);
+ }
+ Tcl_DeleteHashTable (elemAttInfo->attributes);
+ FREE ((char *) elemAttInfo->attributes);
+ FREE ((char *) elemAttInfo);
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ Tcl_DeleteHashTable (info->attDefsTables);
+ entryPtr = Tcl_FirstHashEntry (info->entityDecls, &search);
+ while (entryPtr) {
+ entityInfo = Tcl_GetHashValue (entryPtr);
+ if (entityInfo) {
+ if (entityInfo->is_notation) {
+ FREE (entityInfo->notationName);
+ }
+ FREE ((char *) entityInfo);
+ }
+ entryPtr = Tcl_NextHashEntry (&search);
+ }
+ Tcl_DeleteHashTable (info->entityDecls);
+ Tcl_DeleteHashTable (info->notationDecls);
+ Tcl_DeleteHashTable (info->ids);
+ if (info->doctypeName) {
+ FREE (info->doctypeName);
+ }
+ FREE ((char *) info->tagNames);
+ FREE ((char *) info->attDefsTables);
+ FREE ((char *) info->entityDecls);
+ FREE ((char *) info->notationDecls);
+ FREE ((char *) info->ids);
+ FREE ((char *) info->contentStack);
+}
+#endif
/*---------------------------------------------------------------------------
| domReadDocument
|
\--------------------------------------------------------------------------*/
@@ -2141,10 +4101,13 @@
Tcl_Channel channel,
const char *baseurl,
Tcl_Obj *extResolver,
int useForeignDTD,
int paramEntityParsing,
+#ifndef TDOM_NO_DTD_VALIDATION
+ int dtdvalidation,
+#endif
Tcl_Interp *interp,
int *resultcode
)
{
int done, tclLen;
@@ -2187,11 +4150,39 @@
info.baseURIstackSize = INITIAL_BASEURISTACK_SIZE;
info.baseURIstack = (domActiveBaseURI*)
MALLOC (sizeof(domActiveBaseURI) * info.baseURIstackSize);
info.insideDTD = 0;
info.status = 0;
-
+#ifndef TDOM_NO_DTD_VALIDATION
+ info.dtdvalidation = dtdvalidation;
+
+ if (dtdvalidation) {
+ info.tagNames = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (info.tagNames, TCL_STRING_KEYS);
+ info.elemContentsRewriten = 0;
+ info.dtdstatus = 0;
+ info.idCheck = 1;
+ info.attDefsTables =
+ (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (info.attDefsTables, TCL_STRING_KEYS);
+ info.entityDecls =
+ (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (info.entityDecls, TCL_STRING_KEYS);
+ info.notationDecls =
+ (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (info.notationDecls, TCL_STRING_KEYS);
+ info.ids = (Tcl_HashTable *) MALLOC (sizeof (Tcl_HashTable));
+ Tcl_InitHashTable (info.ids, TCL_STRING_KEYS);
+ info.doctypeName = NULL;
+ info.skipWhiteCDATAs = 1;
+ info.ignorePCDATA = 0;
+ info.contentStack = (TNC_ContentStack *)
+ MALLOC (sizeof (TNC_ContentStack) * TNC_INITCONTENTSTACKSIZE);
+ info.contentStackSize = TNC_INITCONTENTSTACKSIZE;
+ info.contentStackPtr = 0;
+ }
+#endif
XML_SetUserData(parser, &info);
XML_SetBase (parser, baseurl);
/* We must use XML_GetBase(), because XML_SetBase copies the baseURI,
and we want to compare the pointers */
info.baseURIstack[0].baseURI = XML_GetBase (parser);
@@ -2210,17 +4201,22 @@
XML_SetDoctypeDeclHandler (parser, startDoctypeDeclHandler,
endDoctypeDeclHandler);
if (keepCDATA) {
XML_SetCdataSectionHandler(parser, startCDATA, endCDATA);
}
-
-
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (dtdvalidation) {
+ XML_SetElementDeclHandler (parser, TncElementDeclCommand);
+ XML_SetAttlistDeclHandler (parser, TncAttDeclCommand);
+ XML_SetNotationDeclHandler (parser, TncNotationDeclHandler);
+ }
+#endif
if (channel == NULL) {
status = XML_Parse(parser, xml, length, 1);
switch (status) {
case XML_STATUS_SUSPENDED:
- DBG(fprintf(stderr, "XML_STATUS_SUSPENDED\n");)
+ DBG(fprintf(stderr, "XML_STATUS_SUSPENDED\n"));
if (info.status == TCL_BREAK) {
Tcl_ResetResult(interp);
}
/* fall throu */
case XML_STATUS_ERROR:
@@ -2227,10 +4223,13 @@
DBG(fprintf(stderr, "XML_STATUS_ERROR\n");)
FREE ( info.activeNS );
FREE ( info.baseURIstack );
Tcl_DStringFree (info.cdata);
FREE ( info.cdata);
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info.dtdvalidation) TncFreeValidationData (&info);
+#endif
domFreeDocument (doc, NULL, NULL);
*resultcode = info.status;
return NULL;
case XML_STATUS_OK:
break;
@@ -2240,10 +4239,13 @@
if (Tcl_GetChannelOption (interp, channel, "-encoding", &dStr) != TCL_OK) {
FREE ( (char*) info.activeNS );
FREE ( info.baseURIstack );
Tcl_DStringFree (info.cdata);
FREE ( info.cdata);
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info.dtdvalidation) TncFreeValidationData (&info);
+#endif
domFreeDocument (doc, NULL, NULL);
*resultcode = info.status;
return NULL;
}
if (strcmp (Tcl_DStringValue (&dStr), "utf-8")==0 ) useBinary = 1;
@@ -2265,10 +4267,13 @@
DBG(fprintf(stderr, "XML_STATUS_ERROR\n");)
FREE ( info.activeNS );
FREE ( info.baseURIstack );
Tcl_DStringFree (info.cdata);
FREE ( info.cdata);
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info.dtdvalidation) TncFreeValidationData (&info);
+#endif
domFreeDocument (doc, NULL, NULL);
*resultcode = info.status;
return NULL;
case XML_STATUS_OK:
break;
@@ -2293,10 +4298,13 @@
DBG(fprintf(stderr, "XML_STATUS_ERROR\n");)
FREE ( info.activeNS );
FREE ( info.baseURIstack );
Tcl_DStringFree (info.cdata);
FREE ( info.cdata);
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info.dtdvalidation) TncFreeValidationData (&info);
+#endif
domFreeDocument (doc, NULL, NULL);
Tcl_DecrRefCount (bufObj);
*resultcode = info.status;
return NULL;
case XML_STATUS_OK:
@@ -2308,11 +4316,14 @@
}
FREE ( info.activeNS );
FREE ( info.baseURIstack );
Tcl_DStringFree (info.cdata);
FREE ( info.cdata);
-
+#ifndef TDOM_NO_DTD_VALIDATION
+ if (info.dtdvalidation) TncFreeValidationData (&info);
+#endif
+
domSetDocumentElement (doc);
return doc;
}
@@ -5169,10 +7180,13 @@
domActiveNS *activeNS;
int baseURIstackSize;
int baseURIstackPos;
domActiveBaseURI *baseURIstack;
int insideDTD;
+#ifndef TDOM_NO_DTD_VALIDATION
+ int dtdvalidation;
+#endif
/* Now the tdom cmd specific elements */
int tdomStatus;
Tcl_Obj *extResolver;
} tdomCmdReadInfo;
@@ -5241,10 +7255,13 @@
Tcl_DStringSetLength (info->cdata, 0);
info->nextFeedbackPosition = info->feedbackAfter;
info->interp = interp;
info->activeNSpos = -1;
info->insideDTD = 0;
+#ifndef TDOM_NO_DTD_VALIDATION
+ info->dtdvalidation = 0;
+#endif
info->baseURIstackPos = 0;
info->tdomStatus = 0;
}
@@ -5378,10 +7395,13 @@
info->baseURIstackPos = 0;
info->baseURIstackSize = INITIAL_BASEURISTACK_SIZE;
info->baseURIstack = (domActiveBaseURI*)
MALLOC (sizeof(domActiveBaseURI) * info->baseURIstackSize);
info->insideDTD = 0;
+#ifndef TDOM_NO_DTD_VALIDATION
+ info->dtdvalidation = 0;
+#endif
info->tdomStatus = 0;
info->extResolver = NULL;
handlerSet->userData = info;
Index: generic/dom.h
==================================================================
--- generic/dom.h
+++ generic/dom.h
@@ -725,10 +725,13 @@
Tcl_Channel channel,
const char *baseurl,
Tcl_Obj *extResolver,
int useForeignDTD,
int paramEntityParsing,
+#ifndef TDOM_NO_DTD_VALIDATION
+ int dtdvalidation,
+#endif
Tcl_Interp *interp,
int *status);
void domFreeDocument (domDocument *doc,
domFreeCallback freeCB,
Index: generic/domxslt.c
==================================================================
--- generic/domxslt.c
+++ generic/domxslt.c
@@ -5790,11 +5790,15 @@
}
/* keep white space, no fiddling with the encoding (is this
a good idea?) */
doc = domReadDocument (parser, xmlstring, len, 0, 0, storeLineColumn,
0, 0, NULL, chan, extbase, extResolver, 0,
- (int) XML_PARAM_ENTITY_PARSING_ALWAYS, interp,
+ (int) XML_PARAM_ENTITY_PARSING_ALWAYS,
+#ifndef TDOM_NO_DTD_VALIDATION
+ 0,
+#endif
+ interp,
&resultcode);
if (xsltDoc->extResolver) {
Tcl_DecrRefCount (extResolver);
}
if (doc == NULL) {
Index: generic/tcldom.c
==================================================================
--- generic/tcldom.c
+++ generic/tcldom.c
@@ -1223,10 +1223,13 @@
NULL,
NULL,
extResolver,
0,
(int) XML_PARAM_ENTITY_PARSING_ALWAYS,
+#ifndef TDOM_NO_DTD_VALIDATION
+ 0,
+#endif
interp,
&resultcode);
if (extResolver) {
Tcl_DecrRefCount(extResolver);
}
@@ -6157,10 +6160,13 @@
int ignorexmlns = 0;
int feedbackAfter = 0;
int useForeignDTD = 0;
int paramEntityParsing = (int)XML_PARAM_ENTITY_PARSING_ALWAYS;
int keepCDATA = 0;
+#ifndef TDOM_NO_DTD_VALIDATION
+ int dtdvalidation = 0;
+#endif
int status = 0;
domDocument *doc;
Tcl_Obj *newObjName = NULL;
XML_Parser parser;
Tcl_Channel chan = (Tcl_Channel) NULL;
@@ -6173,11 +6179,15 @@
"-feedbackcmd", "-json", "-jsonroot",
#ifdef TDOM_HAVE_GUMBO
"-html5",
#endif
"-jsonmaxnesting", "-ignorexmlns", "--",
- "-keepCDATA", NULL
+ "-keepCDATA",
+#ifndef TDOM_NO_DTD_VALIDATION
+ "-dtdvalidation",
+#endif
+ NULL
};
enum parseOption {
o_keepEmpties, o_simple, o_html,
o_feedbackAfter, o_channel, o_baseurl,
o_externalentitycommand, o_useForeignDTD, o_paramentityparsing,
@@ -6185,10 +6195,13 @@
#ifdef TDOM_HAVE_GUMBO
o_htmlfive,
#endif
o_jsonmaxnesting, o_ignorexmlns, o_LAST,
o_keepCDATA
+#ifndef TDOM_NO_DTD_VALIDATION
+ ,o_dtdvalidation
+#endif
};
static const char *paramEntityParsingValues[] = {
"always",
"never",
@@ -6410,10 +6423,16 @@
objv++; objc--; break;
case o_keepCDATA:
keepCDATA = 1;
objv++; objc--; break;
+
+#ifndef TDOM_NO_DTD_VALIDATION
+ case o_dtdvalidation:
+ dtdvalidation = 1;
+ objv++; objc--; break;
+#endif
}
if ((enum parseOption) optionIndex == o_LAST) break;
}
@@ -6569,10 +6588,13 @@
chan,
baseURI,
extResolver,
useForeignDTD,
paramEntityParsing,
+#ifndef TDOM_NO_DTD_VALIDATION
+ dtdvalidation,
+#endif
interp,
&status);
if (doc == NULL) {
char s[50];
long byteIndex, i;
@@ -6660,18 +6682,19 @@
static const char *features[] = {
"expatversion", "expatmajorversion", "expatminorversion",
"expatmicroversion", "dtd", "ns",
"unknown", "tdomalloc", "lessns",
"html5", "jsonmaxnesting", "versionhash",
- "pullparser", "TCL_UTF_MAX", NULL
+ "pullparser", "TCL_UTF_MAX", "dtdvalidation",
+ NULL
};
enum feature {
o_expatversion, o_expatmajorversion, o_expatminorversion,
o_expatmicroversion, o_dtd, o_ns,
o_unknown, o_tdomalloc, o_lessns,
o_html5, o_jsonmaxnesting, o_versionhash,
- o_pullparser, o_TCL_UTF_MAX,
+ o_pullparser, o_TCL_UTF_MAX, o_dtdvalidation
};
if (Tcl_GetIndexFromObj(interp, objv[1], features, "feature", 0,
&featureIndex) != TCL_OK) {
return TCL_ERROR;
@@ -6754,10 +6777,18 @@
SetBooleanResult(result);
break;
case o_TCL_UTF_MAX:
SetIntResult(TCL_UTF_MAX);
break;
+ case o_dtdvalidation:
+#ifndef TDOM_NO_DTD_VALIDATION
+ result = 1;
+#else
+ result = 0;
+#endif
+ SetBooleanResult(result);
+ break;
}
return TCL_OK;
}
/*----------------------------------------------------------------------------
Index: tdom.m4
==================================================================
--- tdom.m4
+++ tdom.m4
@@ -263,10 +263,51 @@
fi
else
AC_MSG_RESULT([no])
fi
])
+
+#------------------------------------------------------------------------
+# TDOM_ENABLE_DTD_VALIDATION --
+#
+# Building with validation features.
+#
+# Arguments:
+# None
+#
+# Results:
+#
+# Adds the following arguments to configure:
+# --enable-dtdvalidation=yes|no
+#
+# Defines the following vars:
+#
+# Sets the following vars:
+#
+#------------------------------------------------------------------------
+
+AC_DEFUN(TDOM_ENABLE_DTD_VALIDATION, [
+ AC_MSG_CHECKING([whether to enable dtd valiation])
+ AC_ARG_ENABLE(dtdvalidation,
+ AC_HELP_STRING([--enable-dtdvalidation],
+ [build with valiation features (default: on)]),
+ [tcl_ok=$enableval], [tcl_ok=yes])
+
+ if test "${enable_dtdvalidation+set}" = set; then
+ enableval="$enable_dtdvalidation"
+ tcl_ok=$enableval
+ else
+ tcl_ok=yes
+ fi
+
+ if test "$tcl_ok" = "no" ; then
+ AC_MSG_RESULT([no])
+ AC_DEFINE(TDOM_NO_DTD_VALIDATION)
+ else
+ AC_MSG_RESULT([yes])
+ fi
+])
#------------------------------------------------------------------------
# TDOM_PATH_AOLSERVER
#
# Allows the building with support for AOLserver
Index: tests/data/REC-xslt-19991116.xml
==================================================================
--- tests/data/REC-xslt-19991116.xml
+++ tests/data/REC-xslt-19991116.xml
@@ -6352,15 +6352,11 @@
a way to group together consecutive nodes having duplicate
subelements or attributes;
features to make handling of the HTML style
attribute more convenient.
-
-features to make handling of the HTML style
-attribute more convenient.
-
Index: tests/dom.test
==================================================================
--- tests/dom.test
+++ tests/dom.test
@@ -17,10 +17,12 @@
# dom-12.*: -feedbackAfter
#
# Copyright (c) 2002, 2003, 2004 Rolf Ade.
source [file join [file dir [info script]] loadtdom.tcl]
+
+testConstraint dtdvalidation [dom featureinfo dtdvalidation]
test dom-1.1 {createDocument with root node name not a XML Name} {
list [catch {dom createDocument "root node"} msg] $msg
} "1 {Invalid root element name 'root node'}"
@@ -670,16 +672,66 @@
set result [$doc selectNodes count(doc/e/node())]
$doc delete
set result
} 1
-test dom-2.42 {namespaces} {
+proc dom-2.42-resolver {base systemId publicId} {
+ switch $publicId {
+ "-//W3C//DTD Specification V2.0//EN" {
+ set fd [open [file join [file dir [info script]] \
+ data/xmlspec-v20.dtd]]
+ set xmlspec [read $fd]
+ close $fd
+ return [list "string" "" $xmlspec]
+ }
+ default {
+ puts stderr "Unexpected systemId '$systemId'"
+ return ""
+ }
+ }
+}
+
+test dom-2.42 {-dtdvalidation} {dtdvalidation} {
+ set file [file join [file dir [info script]] data/REC-xslt-19991116.xml]
+ set fd [open $file]
+ set doc [dom parse -dtdvalidation \
+ -channel $fd \
+ -baseurl file://$file \
+ -externalentitycommand dom-2.42-resolver]
+ $doc delete
+} {}
+
+test dom-2.43 {-dtdvalidation / invalid dtd} {dtdvalidation} {
+ set result [catch {
+ dom parse -dtdvalidation {]>
+ }} errMsg]
+ set result
+} 1
+
+test dom-2.44 {namespaces} {
set doc [dom parse {
notes
}]
$doc delete
} {}
+
+test dom-2.45 {-dtdvalidation / no dtd} {dtdvalidation} {
+ set result [catch {
+ #dom parse -dtdvalidation
+ dom parse -dtdvalidation
+ } errMsg]
+ set result
+} 1
+
+test dom-2.46 {Unknown empty element} {dtdvalidation} {
+ set result [catch {
+ dom parse -dtdvalidation {]>
+ }} errMsg]
+ set result
+} 1
test dom-3.1 {isName} {
dom isName ":foo"
} {1}