Marpa

Check-in [5af0029e73]
Login

Check-in [5af0029e73]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Extended tracing, print symbol names in various places.
Timelines: family | ancestors | descendants | both | runtime-c
Files: files | file ages | folders
SHA1: 5af0029e73cb89ee1a455c16cb0c2be8dff0d3c1
User & Date: aku 2017-08-02 06:53:49.817
Context
2017-08-02
06:54
Pulled unicode CC fix (class :space:) into the RTC work check-in: f440b08f4b user: aku tags: runtime-c
06:53
Extended tracing, print symbol names in various places. check-in: 5af0029e73 user: aku tags: runtime-c
06:51
Tweak and extend tracing (print names of symbols accepted in gate) check-in: 17de50b73c user: aku tags: runtime-c
Changes
Unified Diff Ignore Whitespace Patch
Changes to rtc/lexer.c.
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}

void
marpatcl_rtc_lexer_enter (marpatcl_rtc_p p, int ch)
{
    int res;
    TRACE_ENTER ("marpatcl_rtc_lexer_enter");
    TRACE (("rtc %p byte %d", p, ch));
    /* Contrary to the Tcl runtime the C engine does not get multiple symbols,
     * only one, the current byte. Because byte-ranges are coded as rules in
     * the grammar instead of as input symbols.
     */

    if (LEX.start == -1) {
	LEX.start = GATE.lastloc;







|







107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}

void
marpatcl_rtc_lexer_enter (marpatcl_rtc_p p, int ch)
{
    int res;
    TRACE_ENTER ("marpatcl_rtc_lexer_enter");
    TRACE (("rtc %p byte %d @ %d", p, ch, GATE.lastloc));
    /* Contrary to the Tcl runtime the C engine does not get multiple symbols,
     * only one, the current byte. Because byte-ranges are coded as rules in
     * the grammar instead of as input symbols.
     */

    if (LEX.start == -1) {
	LEX.start = GATE.lastloc;
207
208
209
210
211
212
213


214
215
216
217
218
219
220
    
    /* And feed the results into the new lexer recce */
    n = marpatcl_rtc_symset_size (ACCEPT);
    if (n) {
	int k;
	buf = marpatcl_rtc_symset_dense (ACCEPT);
	for (k=0; k < n; k++) {


	    res = marpa_r_alternative (LEX.recce, TO_ACS (buf [k]), 1, 1);
	    marpatcl_rtc_fail_syscheck (p, LEX.g, res, "l0 alternative/b");
	}

	res = marpa_r_earleme_complete (LEX.recce);
	marpatcl_rtc_fail_syscheck (p, LEX.g, res, "l0 earleme_complete/b");
	// TODO marpatcl_process_events (p->l0, HANDLER, CDATA);







>
>







207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
    
    /* And feed the results into the new lexer recce */
    n = marpatcl_rtc_symset_size (ACCEPT);
    if (n) {
	int k;
	buf = marpatcl_rtc_symset_dense (ACCEPT);
	for (k=0; k < n; k++) {
	    TRACE (("ACCEPT [%d]: %d = %s", k, TO_ACS (buf[k]),
		    marpatcl_rtc_spec_symname (SPEC->l0, TO_ACS (buf[k]), 0)));
	    res = marpa_r_alternative (LEX.recce, TO_ACS (buf [k]), 1, 1);
	    marpatcl_rtc_fail_syscheck (p, LEX.g, res, "l0 alternative/b");
	}

	res = marpa_r_earleme_complete (LEX.recce);
	marpatcl_rtc_fail_syscheck (p, LEX.g, res, "l0 earleme_complete/b");
	// TODO marpatcl_process_events (p->l0, HANDLER, CDATA);
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305


306
307
308
309
310
311
312
313
314
315
316
317
	/* rule = lexeme rule id
	 * token = ACS symbol id (lexeme or discard)
	 * Range'L0: 256 ... 256+L+D-1
	 * Range'G1: 0 ... L+D-1
	 */
	
	token = TO_TERMINAL (token);
	TRACE (("rtc %p terminal %d", p, token));

	/* token = G1 terminal id (lexeme) or pseudo-terminal (discard)
	 * Range:   0 ... L+D-1
	 * Lexeme:  0 ... L-1
	 * Discard: L ... L+D-1
	 */

	ASSERT (token < (SPEC->lexemes+SPEC->discards), "pseudo-terminal out of bounds");
	if (token >= SPEC->lexemes) {
	    TRACE (("rtc %p discard pseudo-terminal", p));
	    discarded ++;
	    continue;
	}
	    


	if (marpatcl_rtc_symset_contains (FOUND, token)) {
	    TRACE (("rtc %p duplicate, skip", p));
	    continue;
	}
	TRACE (("rtc %p new, save", p));
	marpatcl_rtc_symset_include (FOUND, 1, &token);

	// SV deduplication: If the semantic codes are all for a value
	// independent if token/rule ids, a single SV is generated and used in
	// all the symbols. Otherwise an SV is generated for each unique
	// token.  Note, this will not catch a token with multiple rules, for
	// these only the first is captured and forwarded.







<









|



|
>
>




|







286
287
288
289
290
291
292

293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
	/* rule = lexeme rule id
	 * token = ACS symbol id (lexeme or discard)
	 * Range'L0: 256 ... 256+L+D-1
	 * Range'G1: 0 ... L+D-1
	 */
	
	token = TO_TERMINAL (token);


	/* token = G1 terminal id (lexeme) or pseudo-terminal (discard)
	 * Range:   0 ... L+D-1
	 * Lexeme:  0 ... L-1
	 * Discard: L ... L+D-1
	 */

	ASSERT (token < (SPEC->lexemes+SPEC->discards), "pseudo-terminal out of bounds");
	if (token >= SPEC->lexemes) {
	    TRACE (("rtc %p token %d, discard pseudo-terminal", p, token));
	    discarded ++;
	    continue;
	}

	TRACE (("rtc %p terminal %d (%s)", p, token, marpatcl_rtc_spec_symname (SPEC->g1, token, 0)));

	if (marpatcl_rtc_symset_contains (FOUND, token)) {
	    TRACE (("rtc %p duplicate, skip", p));
	    continue;
	}
	TRACE (("rtc %p token %d (%s) save", p, token, marpatcl_rtc_spec_symname (SPEC->g1, token, 0)));
	marpatcl_rtc_symset_include (FOUND, 1, &token);

	// SV deduplication: If the semantic codes are all for a value
	// independent if token/rule ids, a single SV is generated and used in
	// all the symbols. Otherwise an SV is generated for each unique
	// token.  Note, this will not catch a token with multiple rules, for
	// these only the first is captured and forwarded.