tDOM

Check-in [3ae775c452]
Login

Check-in [3ae775c452]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added a configurable threshold for the optimization.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | mixedoptimization
Files: files | file ages | folders
SHA3-256: 3ae775c45222d238b43dbda64bbb721b8614a74786a48856e6dae1247103f9cd
User & Date: rolf 2020-02-01 01:12:31.117
Context
2020-02-01
01:22
Added an optimization in case of serveral easily distinguished choices in a choice content particle. check-in: 19b4a5d0ea user: rolf tags: schema
01:12
Added a configurable threshold for the optimization. Closed-Leaf check-in: 3ae775c452 user: rolf tags: mixedoptimization
2020-01-27
23:48
Merged from schema. check-in: 01080e891e user: rolf tags: mixedoptimization
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/schema.c.
32
33
34
35
36
37
38



39
40
41
42
43
44
45

#ifdef _MSC_VER
#include <io.h>
#else
#include <unistd.h>
#endif




#ifndef TDOM_EXPAT_READ_SIZE
# define TDOM_EXPAT_READ_SIZE (1024*8)
#endif
#ifndef O_BINARY
#ifdef _O_BINARY
#define O_BINARY _O_BINARY
#else







>
>
>







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

#ifdef _MSC_VER
#include <io.h>
#else
#include <unistd.h>
#endif

#ifndef TDOM_CHOICE_HASH_THRESHOLD
# define TDOM_CHOICE_HASH_THRESHOLD 5
#endif
#ifndef TDOM_EXPAT_READ_SIZE
# define TDOM_EXPAT_READ_SIZE (1024*8)
#endif
#ifndef O_BINARY
#ifdef _O_BINARY
#define O_BINARY _O_BINARY
#else
589
590
591
592
593
594
595

596
597
598
599
600
601
602
    Tcl_IncrRefCount (sdata->textStub[2]);
    sdata->cdata = TMALLOC (Tcl_DString);
    Tcl_DStringInit (sdata->cdata);
    Tcl_InitHashTable (&sdata->ids, TCL_STRING_KEYS);
    sdata->unknownIDrefs = 0;
    Tcl_InitHashTable (&sdata->idTables, TCL_STRING_KEYS);
    Tcl_InitHashTable (&sdata->keySpaces, TCL_STRING_KEYS);

    return sdata;
}

static void schemaInstanceDelete (
    ClientData clientData
    )
{







>







592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
    Tcl_IncrRefCount (sdata->textStub[2]);
    sdata->cdata = TMALLOC (Tcl_DString);
    Tcl_DStringInit (sdata->cdata);
    Tcl_InitHashTable (&sdata->ids, TCL_STRING_KEYS);
    sdata->unknownIDrefs = 0;
    Tcl_InitHashTable (&sdata->idTables, TCL_STRING_KEYS);
    Tcl_InitHashTable (&sdata->keySpaces, TCL_STRING_KEYS);
    sdata->choiceHashThreshold = TDOM_CHOICE_HASH_THRESHOLD;
    return sdata;
}

static void schemaInstanceDelete (
    ClientData clientData
    )
{
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982

3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999








4000
4001
4002
4003
4004
4005
4006
    Tcl_Interp *interp,
    int objc,
    Tcl_Obj *const objv[]
    )
{
    int            methodIndex, keywordIndex, hnew, hnew1, patternIndex;
    int            result = TCL_OK, forwardDef = 0, i = 0, j, mode;
    int            savedDefineToplevel, type, len;
    unsigned int   savedNumPatternList;
    SchemaData    *savedsdata = NULL, *sdata = (SchemaData *) clientData;
    Tcl_HashTable *hashTable;
    Tcl_HashEntry *h, *h1;
    SchemaCP      *pattern, *thiscp, *current = NULL;
    void          *namespacePtr, *savedNamespacePtr;
    char          *xmlstr, *errMsg;
    domDocument   *doc;
    domNode       *node;
    Tcl_Obj       *attData;
    Tcl_Channel  chan = NULL;

    static const char *schemaInstanceMethods[] = {
        "defelement", "defpattern", "start",    "event",        "delete",
        "reset",      "define",     "validate", "domvalidate",  "deftext",
        "info",       "reportcmd",  "prefixns", "validatefile",
        "validatechannel",          "defelementtype", NULL

    };
    enum schemaInstanceMethod {
        m_defelement, m_defpattern, m_start,    m_event,        m_delete,
        m_reset,      m_define,     m_validate, m_domvalidate,  m_deftext,
        m_info,       m_reportcmd,  m_prefixns, m_validatefile,
        m_validatechannel,          m_defelementtype
    };

    static const char *eventKeywords[] = {
        "start", "end", "text", NULL
    };

    enum eventKeyword
    {
        k_elementstart, k_elementend, k_text
    };









    if (sdata == NULL) {
        /* Inline defined defelement, defelementtype, defpattern,
         * deftext, start or prefixns */
        sdata = GETASI;
        CHECK_SI;
        if (!sdata->defineToplevel && sdata->currentEvals > 1) {
            SetResult ("Method not allowed in nested schema define script");







|
















|
>





|





<





>
>
>
>
>
>
>
>







3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998

3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
    Tcl_Interp *interp,
    int objc,
    Tcl_Obj *const objv[]
    )
{
    int            methodIndex, keywordIndex, hnew, hnew1, patternIndex;
    int            result = TCL_OK, forwardDef = 0, i = 0, j, mode;
    int            savedDefineToplevel, type, len, n;
    unsigned int   savedNumPatternList;
    SchemaData    *savedsdata = NULL, *sdata = (SchemaData *) clientData;
    Tcl_HashTable *hashTable;
    Tcl_HashEntry *h, *h1;
    SchemaCP      *pattern, *thiscp, *current = NULL;
    void          *namespacePtr, *savedNamespacePtr;
    char          *xmlstr, *errMsg;
    domDocument   *doc;
    domNode       *node;
    Tcl_Obj       *attData;
    Tcl_Channel  chan = NULL;

    static const char *schemaInstanceMethods[] = {
        "defelement", "defpattern", "start",    "event",        "delete",
        "reset",      "define",     "validate", "domvalidate",  "deftext",
        "info",       "reportcmd",  "prefixns", "validatefile",
        "validatechannel",          "defelementtype",           "set",
        NULL
    };
    enum schemaInstanceMethod {
        m_defelement, m_defpattern, m_start,    m_event,        m_delete,
        m_reset,      m_define,     m_validate, m_domvalidate,  m_deftext,
        m_info,       m_reportcmd,  m_prefixns, m_validatefile,
        m_validatechannel,          m_defelementtype,           m_set
    };

    static const char *eventKeywords[] = {
        "start", "end", "text", NULL
    };

    enum eventKeyword
    {
        k_elementstart, k_elementend, k_text
    };

    static const char *setKeywords[] = {
        "choiceHashThreshold",  NULL
    };
    enum setKeyword
    {
        s_choiceHashThreshold
    };
        
    if (sdata == NULL) {
        /* Inline defined defelement, defelementtype, defpattern,
         * deftext, start or prefixns */
        sdata = GETASI;
        CHECK_SI;
        if (!sdata->defineToplevel && sdata->currentEvals > 1) {
            SetResult ("Method not allowed in nested schema define script");
4561
4562
4563
4564
4565
4566
4567



























4568
4569
4570
4571
4572
4573
4574
        sdata->defineToplevel = savedDefineToplevel;
        sdata->currentNamespace = savedNamespacePtr;
        if (!savedDefineToplevel) {
            SETASI(savedsdata);
        }
        break;
            



























    default:
        Tcl_SetResult (interp, "unknown method", NULL);
        result = TCL_ERROR;
        break;

    }
    if (sdata->cleanupAfterUse && sdata->currentEvals == 0







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
        sdata->defineToplevel = savedDefineToplevel;
        sdata->currentNamespace = savedNamespacePtr;
        if (!savedDefineToplevel) {
            SETASI(savedsdata);
        }
        break;
            
    case m_set:
        if (objc < 3 || objc > 4) {
            Tcl_WrongNumArgs (interp, 2, objv, "setting ?value?");
            return TCL_ERROR;
        }        
        if (Tcl_GetIndexFromObj (interp, objv[2], setKeywords,
                                 "setting", 0, &keywordIndex)
            != TCL_OK) {
            return TCL_ERROR;
        }
        switch ((enum setKeyword) keywordIndex) {
        case s_choiceHashThreshold:
            if (objc == 4) {
                if (Tcl_GetIntFromObj (interp, objv[3], &n) != TCL_OK) {
                    SetResult ("Invalid threshold value");
                    return TCL_ERROR;
                }
                if (n < 0) {
                    SetResult ("Invalid threshold value");
                    return TCL_ERROR;
                }
                sdata->choiceHashThreshold = n;
            }
            SetIntResult (sdata->choiceHashThreshold);
        }
        break;
        
    default:
        Tcl_SetResult (interp, "unknown method", NULL);
        result = TCL_ERROR;
        break;

    }
    if (sdata->cleanupAfterUse && sdata->currentEvals == 0
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
                    pattern->flags |= CONSTRAINT_TEXT_CHILD;
                    break;
                }
            }
        }
        if (pattern->type == SCHEMA_CTYPE_CHOICE
            && onlyName
            && pattern->nc > 4) {
            pattern->childs = TMALLOC (Tcl_HashTable);
            Tcl_InitHashTable (pattern->childs, TCL_ONE_WORD_KEYS);
            hnew = 1;
            for (i = 0; i < pattern->nc; i++) {
                if (pattern->content[i]->type != SCHEMA_CTYPE_NAME) {
                    continue;
                }
                h = Tcl_CreateHashEntry (pattern->childs,
                                         pattern->content[i]->name, &hnew);
                if (!hnew) {
                    break;
                }
                Tcl_SetHashValue (h, pattern->content[i]);
            }
            if (!hnew) {
                /* No simple lookup possible because of more than one
                 * elements with the same local name belong the
                 * choices. Rewind. */
                Tcl_DeleteHashTable (pattern->childs);
                FREE (pattern->childs);
                pattern->childs = NULL;
            }
        }
        addToContent (sdata, pattern, quant, n, m);







|
















|







4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
                    pattern->flags |= CONSTRAINT_TEXT_CHILD;
                    break;
                }
            }
        }
        if (pattern->type == SCHEMA_CTYPE_CHOICE
            && onlyName
            && pattern->nc > sdata->choiceHashThreshold) {
            pattern->childs = TMALLOC (Tcl_HashTable);
            Tcl_InitHashTable (pattern->childs, TCL_ONE_WORD_KEYS);
            hnew = 1;
            for (i = 0; i < pattern->nc; i++) {
                if (pattern->content[i]->type != SCHEMA_CTYPE_NAME) {
                    continue;
                }
                h = Tcl_CreateHashEntry (pattern->childs,
                                         pattern->content[i]->name, &hnew);
                if (!hnew) {
                    break;
                }
                Tcl_SetHashValue (h, pattern->content[i]);
            }
            if (!hnew) {
                /* No simple lookup possible because of more than one
                 * elements with the same local name belong to the
                 * choices. Rewind. */
                Tcl_DeleteHashTable (pattern->childs);
                FREE (pattern->childs);
                pattern->childs = NULL;
            }
        }
        addToContent (sdata, pattern, quant, n, m);
Changes to generic/schema.h.
187
188
189
190
191
192
193

194
195
196
197
198
199
200
    Tcl_DString *cdata;
    Tcl_HashTable ids;
    int unknownIDrefs;
    Tcl_HashTable idTables;
    Tcl_HashTable keySpaces;
    XML_Parser parser;
    domNode *node;

} SchemaData;

int 
schemaInstanceCmd (
    ClientData clientData,
    Tcl_Interp *interp,
    int objc,







>







187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    Tcl_DString *cdata;
    Tcl_HashTable ids;
    int unknownIDrefs;
    Tcl_HashTable idTables;
    Tcl_HashTable keySpaces;
    XML_Parser parser;
    domNode *node;
    int choiceHashThreshold;
} SchemaData;

int 
schemaInstanceCmd (
    ClientData clientData,
    Tcl_Interp *interp,
    int objc,