Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Save work. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | wip |
Files: | files | file ages | folders |
SHA3-256: |
3b5fdf0979b152a63a233e3d0edf439f |
User & Date: | rolf 2020-03-27 00:42:53.646 |
Context
2020-03-28
| ||
01:28 | Fixed info definedElements and info definedElementtypes. Added info definedPatterns and info patterndefinition. check-in: 1711ee96ee user: rolf tags: wip | |
2020-03-27
| ||
00:54 | Merged so far done work: recovering script may now decide about recover strategy by return value in case of MISSING_ELEMENT with element start event. Beside the default "ignore the element and the rest of the current parent element content and continue with the parents sibling" now there is "ignore", advising the validation engine to pretend the missing mandatory element had matched and we are looking for the element start event from there, and "vanish", advising the validation engine to skip the element as it would haven't been in the input XML. In case of UNEXPECTED_ELEMENT in element start event the result "vanish" advices the validation engine to skip the element (which allows to see further unexpected elements in the source). check-in: f90aa277b4 user: rolf tags: schema | |
00:42 | Save work. check-in: 3b5fdf0979 user: rolf tags: wip | |
2020-03-25
| ||
20:17 | Save work. check-in: 0e666eae10 user: rolf tags: wip | |
Changes
Changes to generic/schema.c.
︙ | ︙ | |||
362 363 364 365 366 367 368 | hm = se->hasMatched; \ if (hm && maxOne (cp->quants[ac])) { \ ac += + 1; \ hm = 0; \ } \ | | | | | | 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | hm = se->hasMatched; \ if (hm && maxOne (cp->quants[ac])) { \ ac += + 1; \ hm = 0; \ } \ #define updateStack(sdata,se,ac) \ if (!(sdata->recoverFlags & RECOVER_FLAG_REWIND)) { \ se->activeChild = ac; \ se->hasMatched = 1; \ } \ static SchemaCP* initSchemaCP ( Schema_CP_Type type, void *namespace, char *name ) |
︙ | ︙ | |||
1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 | return 0; } switch (errorType) { case MISSING_ELEMENT_MATCH_START: if (strcmp (Tcl_GetStringResult (interp), "ignore") == 0) { sdata->recoverFlags |= RECOVER_FLAG_IGNORE; return 1; } else { /* Rewind stack to last match and ignore the just opened * Element. */ finalizeElement (sdata, ac+1); sdata->skipDeep = 2; } break; case UNEXPECTED_ELEMENT: | > > > > > > > > > | | > | 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 | return 0; } switch (errorType) { case MISSING_ELEMENT_MATCH_START: if (strcmp (Tcl_GetStringResult (interp), "ignore") == 0) { sdata->recoverFlags |= RECOVER_FLAG_IGNORE; return 1; } else if (strcmp (Tcl_GetStringResult (interp), "vanish") == 0) { sdata->recoverFlags |= RECOVER_FLAG_REWIND; sdata->skipDeep = 1; return 1; } else { /* Rewind stack to last match and ignore the just opened * Element. */ finalizeElement (sdata, ac+1); sdata->skipDeep = 2; } break; case UNEXPECTED_ELEMENT: if (strcmp (Tcl_GetStringResult (interp), "vanish") == 0) { sdata->recoverFlags |= RECOVER_FLAG_REWIND; sdata->skipDeep = 1; return 1; } else { finalizeElement (sdata, ac+1); sdata->skipDeep = 2; } break; case UNEXPECTED_TEXT: sdata->recoverFlags |= RECOVER_FLAG_REWIND; break; case MISSING_ELEMENT_MATCH_END: case MISSING_TEXT_MATCH_END: sdata->recoverFlags |= RECOVER_FLAG_DONT_REPORT; |
︙ | ︙ | |||
1278 1279 1280 1281 1282 1283 1284 | break; case SCHEMA_CTYPE_ANY: if (candidate->namespace && candidate->namespace != namespace) { break; } | | | | | | | | 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 | break; case SCHEMA_CTYPE_ANY: if (candidate->namespace && candidate->namespace != namespace) { break; } updateStack (sdata, se, ac); sdata->skipDeep = 1; /* See comment in probeElement: sdata->vname and * sdata->vns may be pre-filled. We reset it here.*/ sdata->vname = NULL; sdata->vns = NULL; return 1; case SCHEMA_CTYPE_NAME: DBG(fprintf (stderr, "name: %s ns: %s candidate name: %s " "candidate ns: %s\n", name, namespace, candidate->name, candidate->namespace)); if (candidate->name == name && candidate->namespace == namespace) { pushToStack (sdata, candidate); updateStack (sdata, se, ac); return 1; } break; case SCHEMA_CTYPE_CHOICE: if (candidate->typedata) { h = Tcl_FindHashEntry ((Tcl_HashTable *)candidate->typedata, name); if (h) { icp = Tcl_GetHashValue (h); if (icp->namespace == namespace) { pushToStack (sdata, icp); updateStack (sdata, se, ac); return 1; } } /* TODO: Short-cut in case of no match (looking * for emtpy match, recovering). For now fall * throu to simple, serial approach. */ } for (i = 0; i < candidate->nc; i++) { icp = candidate->content[i]; switch (icp->type) { case SCHEMA_CTYPE_TEXT: break; case SCHEMA_CTYPE_ANY: if (icp->namespace && icp->namespace != namespace) { break; } updateStack (sdata, se, ac); sdata->skipDeep = 1; /* See comment in probeElement: sdata->vname * and sdata->vns may be pre-filled. We reset it * here.*/ sdata->vname = NULL; sdata->vns = NULL; return 1; case SCHEMA_CTYPE_NAME: if (icp->name == name && icp->namespace == namespace) { pushToStack (sdata, icp); updateStack (sdata, se, ac); return 1; } break; case SCHEMA_CTYPE_CHOICE: Tcl_Panic ("MIXED or CHOICE child of MIXED or CHOICE"); case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, icp); rc = matchElementStart (interp, sdata, name, namespace); if (rc == 1) { updateStack (sdata, se, ac); return 1; } popStack (sdata); if (rc == -1) mayskip = 1; break; case SCHEMA_CTYPE_VIRTUAL: |
︙ | ︙ | |||
1382 1383 1384 1385 1386 1387 1388 | else return 0; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, candidate); rc = matchElementStart (interp, sdata, name, namespace); if (rc == 1) { | | | 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 | else return 0; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, candidate); rc = matchElementStart (interp, sdata, name, namespace); if (rc == 1) { updateStack (sdata, se, ac); return 1; } popStack (sdata); if (rc == -1) mayskip = 1; break; case SCHEMA_CTYPE_KEYSPACE_END: |
︙ | ︙ | |||
1425 1426 1427 1428 1429 1430 1431 | } if (!mayskip && mustMatch (cp->quants[ac], hm)) { if (recover (interp, sdata, MISSING_ELEMENT_MATCH_START, name, namespace, NULL, ac)) { if (sdata->recoverFlags & RECOVER_FLAG_IGNORE) { /* We pretend the ac content particel had * matched. */ | | < < | 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 | } if (!mayskip && mustMatch (cp->quants[ac], hm)) { if (recover (interp, sdata, MISSING_ELEMENT_MATCH_START, name, namespace, NULL, ac)) { if (sdata->recoverFlags & RECOVER_FLAG_IGNORE) { /* We pretend the ac content particel had * matched. */ updateStack (sdata, se, ac); } return 1; } return 0; } ac++; hm = 0; } if (isName) { if (recover (interp, sdata, UNEXPECTED_ELEMENT, name, namespace, NULL, 0)) { return 1; } return 0; } return -1; case SCHEMA_CTYPE_KEYSPACE: |
︙ | ︙ | |||
1477 1478 1479 1480 1481 1482 1483 | break; case SCHEMA_CTYPE_ANY: if (icp->namespace && icp->namespace == namespace) { break; } sdata->skipDeep = 1; | < | 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 | break; case SCHEMA_CTYPE_ANY: if (icp->namespace && icp->namespace == namespace) { break; } sdata->skipDeep = 1; se->hasMatched = 1; se->interleaveState[i] = 1; /* See comment in probeElement: sdata->vname and * sdata->vns may be pre-filled. We reset it here.*/ sdata->vname = NULL; sdata->vns = NULL; return 1; |
︙ | ︙ | |||
1504 1505 1506 1507 1508 1509 1510 | Tcl_Panic ("MIXED or CHOICE child of INTERLEAVE"); case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, icp); rc = matchElementStart (interp, sdata, name, namespace); if (rc == 1) { | > | | > | 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 | Tcl_Panic ("MIXED or CHOICE child of INTERLEAVE"); case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, icp); rc = matchElementStart (interp, sdata, name, namespace); if (rc == 1) { if (!(sdata->recoverFlags & RECOVER_FLAG_REWIND)) { se->hasMatched = 1; se->interleaveState[i] = 1; } return 1; } popStack (sdata); if (mayskip && rc != -1) mayskip = 0; break; case SCHEMA_CTYPE_VIRTUAL: |
︙ | ︙ | |||
1687 1688 1689 1690 1691 1692 1693 | return TCL_ERROR; } pushToStack (sdata, pattern); return TCL_OK; } /* The normal case: we're inside the tree */ | | > > | > | < > | 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 | return TCL_ERROR; } pushToStack (sdata, pattern); return TCL_OK; } /* The normal case: we're inside the tree */ /* In case of recovering and if the user wants a required cp to be * treated as matched (or in other words: that the validation * engine should ignore the mandatory state of the cp) we unwind * the call stack to have updated stack elements, to be able to * pretend, we have seen the mandatory cp. Now try to match the * open element from this stack state. */ while (1) { rc = matchElementStart (interp, sdata, (char *) namePtr, namespacePtr); while (rc == -1) { popStack (sdata); rc = matchElementStart (interp, sdata, (char *) namePtr, namespacePtr); }; if (rc) { DBG( fprintf (stderr, "probeElement: element '%s' match\n", name); serializeStack (sdata); fprintf (stderr, "\n"); ); if (sdata->recoverFlags & RECOVER_FLAG_IGNORE) { sdata->recoverFlags &= ~RECOVER_FLAG_IGNORE; continue; } CHECK_REWIND; return TCL_OK; } break; } DBG( fprintf (stderr, "element '%s' DOESN'T match\n", name); serializeStack (sdata); |
︙ | ︙ | |||
2408 2409 2410 2411 2412 2413 2414 | /* Fall through */ case SCHEMA_CTYPE_PATTERN: while (ac < cp->nc) { candidate = cp->content[ac]; switch (candidate->type) { case SCHEMA_CTYPE_TEXT: if (checkText (interp, candidate, text)) { | | | | | | | 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 | /* Fall through */ case SCHEMA_CTYPE_PATTERN: while (ac < cp->nc) { candidate = cp->content[ac]; switch (candidate->type) { case SCHEMA_CTYPE_TEXT: if (checkText (interp, candidate, text)) { updateStack (sdata, se, ac); return 1; } if (sdata->evalError) return 0; if (recover (interp, sdata, INVALID_VALUE, NULL, NULL, text, ac)) { updateStack (sdata, se, ac); return 1; } SetResult ("Invalid text content"); return 0; case SCHEMA_CTYPE_CHOICE: if (candidate->flags & MIXED_CONTENT) { updateStack (sdata, se, ac); return 1; } for (i = 0; i < candidate->nc; i++) { ic = candidate->content[i]; switch (ic->type) { case SCHEMA_CTYPE_TEXT: if (checkText (interp, ic, text)) { updateStack (sdata, se, ac); return 1; } break; case SCHEMA_CTYPE_NAME: case SCHEMA_CTYPE_ANY: break; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, ic); if (matchText (interp, sdata, text)) { updateStack (sdata, se, ac); return 1; } popStack (sdata); break; case SCHEMA_CTYPE_VIRTUAL: Tcl_Panic ("Virtual constrain in MIXED or" |
︙ | ︙ | |||
2478 2479 2480 2481 2482 2483 2484 | } break; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, candidate); if (matchText (interp, sdata, text)) { | | | 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 | } break; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, candidate); if (matchText (interp, sdata, text)) { updateStack (sdata, se, ac); return 1; } popStack (sdata); if (mustMatch (cp->quants[ac], hm)) { if (recover (interp, sdata, UNEXPECTED_TEXT, NULL, NULL, text, 0)) { return 1; |
︙ | ︙ | |||
2573 2574 2575 2576 2577 2578 2579 | } else { if (minOne (cp->quants[i])) mayskip = 0; } ic = cp->content[i]; switch (ic->type) { case SCHEMA_CTYPE_TEXT: if (checkText (interp, ic, text)) { | > | | > | | 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 | } else { if (minOne (cp->quants[i])) mayskip = 0; } ic = cp->content[i]; switch (ic->type) { case SCHEMA_CTYPE_TEXT: if (checkText (interp, ic, text)) { if (!(sdata->recoverFlags & RECOVER_FLAG_REWIND)) { se->hasMatched = 1; se->interleaveState[i] = 1; } return 1; } break; case SCHEMA_CTYPE_NAME: case SCHEMA_CTYPE_ANY: break; case SCHEMA_CTYPE_INTERLEAVE: case SCHEMA_CTYPE_PATTERN: pushToStack (sdata, ic); if (matchText (interp, sdata, text)) { updateStack (sdata, se, ac); return 1; } popStack (sdata); break; case SCHEMA_CTYPE_CHOICE: Tcl_Panic ("MIXED or CHOICE child of INTERLEAVE"); |
︙ | ︙ |
Changes to tests/schema.test.
︙ | ︙ | |||
7278 7279 7280 7281 7282 7283 7284 | if {$errorInfo eq "MISSING_ELEMENT"} { return ignore } } else { lappend fromReportCmd "END_EVENT expecting [$scmd info expected]" } } | | | 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 | if {$errorInfo eq "MISSING_ELEMENT"} { return ignore } } else { lappend fromReportCmd "END_EVENT expecting [$scmd info expected]" } } test schema-17.22 {return "ignore" from recover script for MISSING_ELEMENT_MATCH_START} { set defs { { element a element b element c } } |
︙ | ︙ | |||
7314 7315 7316 7317 7318 7319 7320 | lappend result {*}$fromReportCmd incr xmlnr } s delete incr defnr } set result | > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 | lappend result {*}$fromReportCmd incr xmlnr } s delete incr defnr } set result } {0/0: 1 {END_EVENT expecting a} 0/1: 1 {END_EVENT expecting b} 0/2: 1 {matching b} {expecting a} {END_EVENT expecting c} 0/3: 1 {matching c} {expecting a} {matching c} {expecting b} 0/4: 1 {END_EVENT expecting c} 0/5: 1 {matching c} {expecting b} 0/6: 1 {matching b} {expecting a} 0/7: 1 {matching unknown} {expecting a} {matching unknown} {expecting b} {matching unknown} {expecting c} {matching unknown} {expecting {<elementend> {}}} 0/8: 1 {matching unknown} {expecting b} {matching unknown} {expecting c} {matching unknown} {expecting {<elementend> {}}}} proc schema-17.23 {scmd errorInfo} { global fromReportCmd if {[$scmd info vaction] eq "MATCH_ELEMENT_START"} { lappend fromReportCmd "matching [$scmd info vaction name]" "expecting [lsort [$scmd info expected]]" if {$errorInfo in {"MISSING_ELEMENT" "UNEXPECTED_ELEMENT"}} { return vanish } } else { lappend fromReportCmd "END_EVENT expecting [$scmd info expected]" } } test schema-17.23 {return "vanish" from recover handler} { set def { defelement doc { element a ref b element d ? } defpattern b { element b ref c } defpattern c { element c + } } set xmlinput { <doc/> <doc><a/></doc> <doc><b/></doc> <doc><c/></doc> <doc><a/><b/></doc> <doc><a/><c/></doc> <doc><b/><c/></doc> <doc><unknown/></doc> <doc><a/><unknown/></doc> <doc><a/><unknown/><b/><c/></doc> <doc><a/><unknown/><b/><c/><d/></doc> <doc><a/><b/><unknown/><c/></doc> <doc><a/><b/><unknown/><c/><d/></doc> <doc><a/><b/><c/><unknown/></doc> <doc><a/><b/><c/><unknown/><d/></doc> <doc><a/><b/><unknown/><c/><unknown1/><c/><d/><unknown2/><unknown3/></doc> } set result [list] tdom::schema s s define $def s reportcmd schema-17.23 set xmlnr 0 foreach xml $xmlinput { set fromReportCmd "" lappend result $xmlnr: [s validate $xml errMsg] lappend result {*}$fromReportCmd incr xmlnr } s delete set result } {0: 1 {END_EVENT expecting a} 1: 1 {END_EVENT expecting b} 2: 1 {matching b} {expecting a} {END_EVENT expecting a} 3: 1 {matching c} {expecting a} {END_EVENT expecting a} 4: 1 {END_EVENT expecting c} 5: 1 {matching c} {expecting b} {END_EVENT expecting b} 6: 1 {matching b} {expecting a} {matching c} {expecting a} {END_EVENT expecting a} 7: 1 {matching unknown} {expecting a} {END_EVENT expecting a} 8: 1 {matching unknown} {expecting b} {END_EVENT expecting b} 9: 1 {matching unknown} {expecting b} 10: 1 {matching unknown} {expecting b} 11: 1 {matching unknown} {expecting c} 12: 1 {matching unknown} {expecting c} 13: 1 {matching unknown} {expecting {<elementend> {}} c d} 14: 1 {matching unknown} {expecting {<elementend> {}} c d} 15: 1 {matching unknown} {expecting c} {matching unknown1} {expecting {<elementend> {}} c d} {matching unknown2} {expecting {<elementend> {}}} {matching unknown3} {expecting {<elementend> {}}}} proc schema-18 {args} { lappend ::result {*}$args } test schema-18.1 {reportcmd} { tdom::schema s s define { |
︙ | ︙ |