Tcl Source Code

Check-in [7f19c9f6b1]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:code review
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-534-sebres-fast-number-hash
Files: files | file ages | folders
SHA3-256: 7f19c9f6b131746000176395ec4380d15f40feced2cfd9c1951afea0b750a31a
User & Date: sebres 2019-05-17 21:54:51
Context
2019-05-17
21:56
more test cases (covering vice versa situations: hashed as string in dict/var and search for integer... Leaf check-in: 53f9e431af user: sebres tags: tip-534-sebres-fast-number-hash
21:54
code review check-in: 7f19c9f6b1 user: sebres tags: tip-534-sebres-fast-number-hash
18:26
more clean-up, size_t-related consolidation (prepared for unsigned object length in 9.0) check-in: 9208e61ec9 user: sebres tags: tip-534-sebres-fast-number-hash
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclHash.c.

16
17
18
19
20
21
22











23
24
25
26
27
28
29
...
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
...
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
...
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
...
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
....
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
/*
 * Prevent macros from clashing with function definitions.
 */

#undef Tcl_FindHashEntry
#undef Tcl_CreateHashEntry












/*
 * When there are this many entries per bucket, on average, rebuild the hash
 * table to make it larger.
 */

#define REBUILD_MULTIPLIER	3

................................................................................
    const char *key,		/* Key to use to find or create matching
				 * entry. */
    int *newPtr)		/* Store info here telling whether a new entry
				 * was created. */
{
    register Tcl_HashEntry *hPtr;
    const Tcl_HashKeyType *typePtr;
    unsigned int hash;
    int index;

    if (tablePtr->keyType == TCL_STRING_KEYS) {
	typePtr = &tclStringHashKeyType;
    } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) {
	typePtr = &tclOneWordHashKeyType;
    } else if (tablePtr->keyType == TCL_CUSTOM_TYPE_KEYS
................................................................................
	hash = typePtr->hashKeyProc(tablePtr, (void *) key);
	if (typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
	    index = RANDOM_INDEX(tablePtr, hash);
	} else {
	    index = hash & tablePtr->mask;
	}
    } else {
	hash = PTR2UINT(key);
	index = RANDOM_INDEX(tablePtr, hash);
    }

    /*
     * Search all of the entries in the appropriate bucket.
     */

    if (typePtr->compareKeysProc) {
	Tcl_CompareHashKeysProc *compareKeysProc = typePtr->compareKeysProc;

	for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
		hPtr = hPtr->nextPtr) {
	    if (hash != PTR2UINT(hPtr->hash)) {
		continue;
	    }
	    if (((void *) key == hPtr) || compareKeysProc((void *) key, hPtr)) {
		if (newPtr) {
		    *newPtr = 0;
		}
		return hPtr;
	    }
	}
    } else {
	for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
		hPtr = hPtr->nextPtr) {
	    if (hash != PTR2UINT(hPtr->hash)) {
		continue;
	    }
	    if (key == hPtr->key.oneWordValue) {
		if (newPtr) {
		    *newPtr = 0;
		}
		return hPtr;
................................................................................
    } else {
	hPtr = ckalloc(sizeof(Tcl_HashEntry));
	hPtr->key.oneWordValue = (char *) key;
	hPtr->clientData = 0;
    }

    hPtr->tablePtr = tablePtr;
    hPtr->hash = UINT2PTR(hash);
    hPtr->nextPtr = tablePtr->buckets[index];
    tablePtr->buckets[index] = hPtr;
    tablePtr->numEntries++;

    /*
     * If the table has exceeded a decent size, rebuild it with many more
     * buckets.
................................................................................
	typePtr = &tclArrayHashKeyType;
    }

    if (typePtr->hashKeyProc == NULL
	    || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
	index = RANDOM_INDEX(tablePtr, PTR2INT(entryPtr->hash));
    } else {
	index = PTR2UINT(entryPtr->hash) & tablePtr->mask;
    }

    bucketPtr = &tablePtr->buckets[index];

    if (*bucketPtr == entryPtr) {
	*bucketPtr = entryPtr->nextPtr;
    } else {
................................................................................
    for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
	for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
	    *oldChainPtr = hPtr->nextPtr;
	    if (typePtr->hashKeyProc == NULL
		    || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
		index = RANDOM_INDEX(tablePtr, PTR2INT(hPtr->hash));
	    } else {
		index = PTR2UINT(hPtr->hash) & tablePtr->mask;
	    }
	    hPtr->nextPtr = tablePtr->buckets[index];
	    tablePtr->buckets[index] = hPtr;
	}
    }

    /*






>
>
>
>
>
>
>
>
>
>
>







 







|







 







|












|












|







 







|







 







|







 







|







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
...
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
...
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
....
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
/*
 * Prevent macros from clashing with function definitions.
 */

#undef Tcl_FindHashEntry
#undef Tcl_CreateHashEntry


#if !defined(HASH2PTR) && !defined(PTR2HASH)
#   if defined(HAVE_UINTPTR_T) || defined(uintptr_t)
#	define HASH2PTR(p) ((void *)(uintptr_t)(p))
#	define PTR2HASH(p) ((TCL_HASH_TYPE)(uintptr_t)(p))
#   else
#	define HASH2PTR(p) ((void *)(p))
#	define PTR2HASH(p) ((TCL_HASH_TYPE)(p))
#   endif
#endif

/*
 * When there are this many entries per bucket, on average, rebuild the hash
 * table to make it larger.
 */

#define REBUILD_MULTIPLIER	3

................................................................................
    const char *key,		/* Key to use to find or create matching
				 * entry. */
    int *newPtr)		/* Store info here telling whether a new entry
				 * was created. */
{
    register Tcl_HashEntry *hPtr;
    const Tcl_HashKeyType *typePtr;
    TCL_HASH_TYPE hash;
    int index;

    if (tablePtr->keyType == TCL_STRING_KEYS) {
	typePtr = &tclStringHashKeyType;
    } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) {
	typePtr = &tclOneWordHashKeyType;
    } else if (tablePtr->keyType == TCL_CUSTOM_TYPE_KEYS
................................................................................
	hash = typePtr->hashKeyProc(tablePtr, (void *) key);
	if (typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
	    index = RANDOM_INDEX(tablePtr, hash);
	} else {
	    index = hash & tablePtr->mask;
	}
    } else {
	hash = PTR2HASH(key);
	index = RANDOM_INDEX(tablePtr, hash);
    }

    /*
     * Search all of the entries in the appropriate bucket.
     */

    if (typePtr->compareKeysProc) {
	Tcl_CompareHashKeysProc *compareKeysProc = typePtr->compareKeysProc;

	for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
		hPtr = hPtr->nextPtr) {
	    if (hash != PTR2HASH(hPtr->hash)) {
		continue;
	    }
	    if (((void *) key == hPtr) || compareKeysProc((void *) key, hPtr)) {
		if (newPtr) {
		    *newPtr = 0;
		}
		return hPtr;
	    }
	}
    } else {
	for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
		hPtr = hPtr->nextPtr) {
	    if (hash != PTR2HASH(hPtr->hash)) {
		continue;
	    }
	    if (key == hPtr->key.oneWordValue) {
		if (newPtr) {
		    *newPtr = 0;
		}
		return hPtr;
................................................................................
    } else {
	hPtr = ckalloc(sizeof(Tcl_HashEntry));
	hPtr->key.oneWordValue = (char *) key;
	hPtr->clientData = 0;
    }

    hPtr->tablePtr = tablePtr;
    hPtr->hash = HASH2PTR(hash);
    hPtr->nextPtr = tablePtr->buckets[index];
    tablePtr->buckets[index] = hPtr;
    tablePtr->numEntries++;

    /*
     * If the table has exceeded a decent size, rebuild it with many more
     * buckets.
................................................................................
	typePtr = &tclArrayHashKeyType;
    }

    if (typePtr->hashKeyProc == NULL
	    || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
	index = RANDOM_INDEX(tablePtr, PTR2INT(entryPtr->hash));
    } else {
	index = PTR2HASH(entryPtr->hash) & tablePtr->mask;
    }

    bucketPtr = &tablePtr->buckets[index];

    if (*bucketPtr == entryPtr) {
	*bucketPtr = entryPtr->nextPtr;
    } else {
................................................................................
    for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
	for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
	    *oldChainPtr = hPtr->nextPtr;
	    if (typePtr->hashKeyProc == NULL
		    || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) {
		index = RANDOM_INDEX(tablePtr, PTR2INT(hPtr->hash));
	    } else {
		index = PTR2HASH(hPtr->hash) & tablePtr->mask;
	    }
	    hPtr->nextPtr = tablePtr->buckets[index];
	    tablePtr->buckets[index] = hPtr;
	}
    }

    /*

Changes to generic/tclObj.c.

4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
....
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224

4225



4226






4227




4228
4229
4230
4231
4232
4233








4234
4235




4236
4237
4238
4239
4240




4241


4242

4243
4244
4245
4246


4247

4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260


4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
    hPtr->key.objPtr = objPtr;
    Tcl_IncrRefCount(objPtr);
    hPtr->clientData = NULL;

    return hPtr;
}
 
static inline int
IntObjIsCanonical(
    Tcl_Obj *objPtr) 
{
    const char *p = objPtr->bytes;

    /* 
     * Canonical integers are:
................................................................................
int
TclCompareObjKeys(
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    register Tcl_Obj *objPtr1 = keyPtr;
    register Tcl_Obj *objPtr2 = hPtr->key.objPtr;
    register const char *p1, *p2;
    register size_t l1, l2;

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller

       if (objPtr1 == objPtr2) return 1;
    */


    /* Optimisation for comparing integer objects */










    if (objPtr1->typePtr == &tclIntType && objPtr2->typePtr == &tclIntType) {




	if (objPtr1->internalRep.wideValue != objPtr2->internalRep.wideValue) {
	    return 0;
	}
	/* 
	 * Integers are equal, so check it is canonical ...
	 */









	if (IntObjIsCanonical(objPtr1) && IntObjIsCanonical(objPtr2)) {




	    return 1;
	}

	/*
	 * Compare those string representations.




	 */


    }


    /*
     * Don't use Tcl_GetStringFromObj as it would prevent l1 and l2 being
     * in a register.


     */


    p1 = TclGetString(objPtr1);
    l1 = objPtr1->length;
    p2 = TclGetString(objPtr2);
    l2 = objPtr2->length;

    /*
     * Only compare string representations of the same length.
     */

    if (l1 != l2) {
        return 0;
    } else {


	if (!l1) { /* empty string are equal */
	    return 1;
	}

	/* compare both strings */

	do {
	    if (*p1++ != *p2++) {
		return 0;
	    }
	} while (--l1);
    }
    return 1;






|







 







<









>
|
>
>
>

>
>
>
>
>
>

>
>
>
>




|

>
>
>
>
>
>
>
>
|
<
>
>
>
>
|
|
|
|
<
>
>
>
>
|
>
>
|
>


<
<
>
>

>

<

<









>
>





|







4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
....
4208
4209
4210
4211
4212
4213
4214

4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255

4256
4257
4258
4259
4260
4261
4262
4263

4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274


4275
4276
4277
4278
4279

4280

4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
    hPtr->key.objPtr = objPtr;
    Tcl_IncrRefCount(objPtr);
    hPtr->clientData = NULL;

    return hPtr;
}
 
static inline unsigned char
IntObjIsCanonical(
    Tcl_Obj *objPtr) 
{
    const char *p = objPtr->bytes;

    /* 
     * Canonical integers are:
................................................................................
int
TclCompareObjKeys(
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    register Tcl_Obj *objPtr1 = keyPtr;
    register Tcl_Obj *objPtr2 = hPtr->key.objPtr;

    register size_t l1, l2;

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller

       if (objPtr1 == objPtr2) return 1;
    */

    /* 
     * Optimisation for comparing integer objects 
     * This allows also to avoid generation of string representation, so
     * saves memory consumption (CPU-cache washout, branch mispredictions, etc)
     */

    /* If both are not integer - compare strings */
    if (objPtr1->typePtr != &tclIntType && objPtr2->typePtr != &tclIntType) {
	goto stringCompare;
    }

    /* If both are integer */
    if (objPtr1->typePtr == &tclIntType && objPtr2->typePtr == &tclIntType) {
	unsigned char c1, c2;
	/*
	 * Check integers are equal ...
	 */
	if (objPtr1->internalRep.wideValue != objPtr2->internalRep.wideValue) {
	    return 0;
	}
	/* 
	 * Integers are equal, so check they are canonical ...
	 */
	c1 = IntObjIsCanonical(objPtr1);
	c2 = IntObjIsCanonical(objPtr2);
	if (!(c1 | c2)) {
	    /* 
	     * both are not canonical: compare string representations
	     * (which is already available)
	     */
	    goto stringCompare;
	}

	/* 
	 * both are canonical, so equal - return 1, 
	 * or one of them is not, then not equal - return 0
	 */
	return (c1 & c2);
    }
    
    /* 

     * One of them is not an integer so compare string representations.
     *
     * Ensure we have both strings (don't use Tcl_GetStringFromObj as
     * it would prevent l1 and l2 being in a register).
     */
    TclGetString(objPtr1);
    TclGetString(objPtr2);

stringCompare:

    /*


     * Normally we don't need to get strings, because if it is expected, it is
     * already done in TclHashObjKey or above, so simply protect by assert here
     */
    assert(objPtr1->bytes && objPtr2->bytes);


    l1 = objPtr1->length;

    l2 = objPtr2->length;

    /*
     * Only compare string representations of the same length.
     */

    if (l1 != l2) {
        return 0;
    } else {
    	register const char *p1, *p2;

	if (!l1) { /* empty string are equal */
	    return 1;
	}

	/* compare both strings */
	p1 = objPtr1->bytes; p2 = objPtr2->bytes;
	do {
	    if (*p1++ != *p2++) {
		return 0;
	    }
	} while (--l1);
    }
    return 1;