@@ -235,15 +235,54 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp)
235235 return _Py_INTERP_CACHED_OBJECT (interp , interned_strings );
236236}
237237
238+ #define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
239+
238240Py_ssize_t
239241_PyUnicode_InternedSize (void )
240242{
241- return PyObject_Length (get_interned_dict (_PyInterpreterState_GET ()));
243+ PyObject * dict = get_interned_dict (_PyInterpreterState_GET ());
244+ return _Py_hashtable_len (INTERNED_STRINGS ) + PyDict_GET_SIZE (dict );
245+ }
246+
247+ static Py_hash_t unicode_hash (PyObject * );
248+ static int unicode_compare_eq (PyObject * , PyObject * );
249+
250+ static Py_uhash_t
251+ hashtable_unicode_hash (const void * key )
252+ {
253+ return unicode_hash ((PyObject * )key );
254+ }
255+
256+ static int
257+ hashtable_unicode_compare (const void * key1 , const void * key2 )
258+ {
259+ PyObject * obj1 = (PyObject * )key1 ;
260+ PyObject * obj2 = (PyObject * )key2 ;
261+ if (obj1 != NULL && obj2 != NULL ) {
262+ return unicode_compare_eq (obj1 , obj2 );
263+ }
264+ else {
265+ return obj1 == obj2 ;
266+ }
242267}
243268
244269static int
245270init_interned_dict (PyInterpreterState * interp )
246271{
272+ if (_Py_IsMainInterpreter (interp )) {
273+ assert (INTERNED_STRINGS == NULL );
274+ _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc , PyMem_RawFree };
275+ INTERNED_STRINGS = _Py_hashtable_new_full (
276+ hashtable_unicode_hash ,
277+ hashtable_unicode_compare ,
278+ NULL ,
279+ NULL ,
280+ & hashtable_alloc
281+ );
282+ if (INTERNED_STRINGS == NULL ) {
283+ return -1 ;
284+ }
285+ }
247286 assert (get_interned_dict (interp ) == NULL );
248287 PyObject * interned = interned = PyDict_New ();
249288 if (interned == NULL ) {
@@ -262,6 +301,10 @@ clear_interned_dict(PyInterpreterState *interp)
262301 Py_DECREF (interned );
263302 _Py_INTERP_CACHED_OBJECT (interp , interned_strings ) = NULL ;
264303 }
304+ if (_Py_IsMainInterpreter (interp ) && INTERNED_STRINGS != NULL ) {
305+ _Py_hashtable_destroy (INTERNED_STRINGS );
306+ INTERNED_STRINGS = NULL ;
307+ }
265308}
266309
267310#define _Py_RETURN_UNICODE_EMPTY () \
@@ -1222,6 +1265,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
12221265 _PyUnicode_STATE (unicode ).kind = kind ;
12231266 _PyUnicode_STATE (unicode ).compact = 1 ;
12241267 _PyUnicode_STATE (unicode ).ascii = is_ascii ;
1268+ _PyUnicode_STATE (unicode ).statically_allocated = 0 ;
12251269 if (is_ascii ) {
12261270 ((char * )data )[size ] = 0 ;
12271271 }
@@ -1552,7 +1596,9 @@ unicode_dealloc(PyObject *unicode)
15521596 * we accidentally decref an immortal string out of existence. Since
15531597 * the string is an immortal object, just re-set the reference count.
15541598 */
1555- if (PyUnicode_CHECK_INTERNED (unicode )) {
1599+ if (PyUnicode_CHECK_INTERNED (unicode )
1600+ || _PyUnicode_STATE (unicode ).statically_allocated )
1601+ {
15561602 _Py_SetImmortal (unicode );
15571603 return ;
15581604 }
@@ -14502,6 +14548,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1450214548 _PyUnicode_STATE (self ).kind = kind ;
1450314549 _PyUnicode_STATE (self ).compact = 0 ;
1450414550 _PyUnicode_STATE (self ).ascii = _PyUnicode_STATE (unicode ).ascii ;
14551+ _PyUnicode_STATE (self ).statically_allocated = 0 ;
1450514552 _PyUnicode_UTF8_LENGTH (self ) = 0 ;
1450614553 _PyUnicode_UTF8 (self ) = NULL ;
1450714554 _PyUnicode_DATA_ANY (self ) = NULL ;
@@ -14725,6 +14772,23 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
1472514772 return ;
1472614773 }
1472714774
14775+ /* Look in the global cache first. */
14776+ PyObject * r = (PyObject * )_Py_hashtable_get (INTERNED_STRINGS , s );
14777+ if (r != NULL && r != s ) {
14778+ Py_SETREF (* p , Py_NewRef (r ));
14779+ return ;
14780+ }
14781+
14782+ /* Handle statically allocated strings. */
14783+ if (_PyUnicode_STATE (s ).statically_allocated ) {
14784+ assert (_Py_IsImmortal (s ));
14785+ if (_Py_hashtable_set (INTERNED_STRINGS , s , s ) == 0 ) {
14786+ _PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14787+ }
14788+ return ;
14789+ }
14790+
14791+ /* Look in the per-interpreter cache. */
1472814792 PyObject * interned = get_interned_dict (interp );
1472914793 assert (interned != NULL );
1473014794
@@ -14740,9 +14804,11 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
1474014804 }
1474114805
1474214806 if (_Py_IsImmortal (s )) {
14807+ // XXX Restrict this to the main interpreter?
1474314808 _PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14744- return ;
14809+ return ;
1474514810 }
14811+
1474614812#ifdef Py_REF_DEBUG
1474714813 /* The reference count value excluding the 2 references from the
1474814814 interned dictionary should be excluded from the RefTotal. The
0 commit comments