From c40b26d5e23537aca1d266592df04fa15e338902 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 7 Nov 2024 00:56:09 -0800 Subject: [PATCH 1/7] gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. `sys.path` was not properly being sent from the parent process when launching the multiprocessing forkserver process to preload imports. This bug has been there since the forkserver start method was introduced in Python ~3.4. It was always _supposed_ to inherit `sys.path` the same way the spawn method does. Observable behavior change: A `''` value in `sys.path` will now be replaced in the forkserver's `sys.path` with an absolute pathname `os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was imported in the parent process as it already was when using the spawn start method. A workaround for the bug this fixes was to set PYTHONPATH in the environment before the forkserver process was started. --- Lib/multiprocessing/forkserver.py | 2 + .../test_forkserver_main.py | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py diff --git a/Lib/multiprocessing/forkserver.py b/Lib/multiprocessing/forkserver.py index 53b8c492675878..90cc04a8346379 100644 --- a/Lib/multiprocessing/forkserver.py +++ b/Lib/multiprocessing/forkserver.py @@ -174,6 +174,8 @@ def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): spawn.import_main_path(main_path) finally: del process.current_process()._inheriting + if sys_path is not None: + sys.path[:] = sys_path for modname in preload: try: __import__(modname) diff --git a/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py b/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py new file mode 100644 index 00000000000000..1f4ee3ab47214c --- /dev/null +++ b/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py @@ -0,0 +1,57 @@ +import os +import sys +import unittest +from unittest import mock + +from multiprocessing import forkserver + + +class TestForkserverMain(unittest.TestCase): + + def setUp(self): + self._orig_sys_path = list(sys.path) + + def tearDown(self): + sys.path[:] = self._orig_sys_path + + @mock.patch("multiprocessing.process.current_process") + @mock.patch("multiprocessing.spawn.import_main_path") + @mock.patch("multiprocessing.util._close_stdin") + def test_preload_kwargs( + self, + mock_close_stdin, + mock_import_main_path, + mock_current_process, + ): + # Very much a whitebox test of the first stanza of main before + # we start diddling with file descriptors and pipes. + mock_close_stdin.side_effect = RuntimeError("stop test") + self.assertNotIn( + "colorsys", + sys.modules.keys(), + msg="Thie test requires a module that has not yet been imported.", + ) + + with self.assertRaisesRegex(RuntimeError, "stop test"): + forkserver.main(None, None, ["sys", "colorsys"]) + mock_current_process.assert_not_called() + mock_import_main_path.assert_not_called() + self.assertIn("colorsys", sys.modules.keys()) + self.assertEqual(sys.path, self._orig_sys_path) # unmodified + + del sys.modules["colorsys"] # unimport + fake_path = os.path.dirname(__file__) + with self.assertRaisesRegex(RuntimeError, "stop test"): + forkserver.main(None, None, ["sys", "colorsys"], sys_path=[fake_path]) + self.assertEqual( + sys.path, [fake_path], msg="sys.path should have been overridden" + ) + self.assertNotIn( + "colorsys", + sys.modules.keys(), + msg="import of colorsys should have failed with unusual sys.path", + ) + + +if __name__ == "__main__": + unittest.main() From 702e0b7428f0a1eeaabd766d6a440bee5b3d4f14 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 7 Nov 2024 01:40:14 -0800 Subject: [PATCH 2/7] NEWS entry --- .../2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst new file mode 100644 index 00000000000000..22b7e720eba7ae --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst @@ -0,0 +1,11 @@ +Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver +process was to correctly inherit the parent's :data:`sys.path` during the +importing of :func:`multiprocessing.set_forkserver_preload` modules in the +same manner as :data:`sys.path` is configured when executing work items in +the worker processes. + +This bug could cause some forkserver module preloading to silently fail to +be preloaded, leading to a performance degration in child processes due to +additional repeated work. It could also have led to a side effect of ``""`` +still being in :data:`sys.path` during forkserver preload imports instead of +the absolute path of the directory that workers see. From 7ad114f986e6e1f483d952c099d753c9af6b5d69 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 7 Nov 2024 22:31:09 -0800 Subject: [PATCH 3/7] move sys.path setting up per review. --- Lib/multiprocessing/forkserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/multiprocessing/forkserver.py b/Lib/multiprocessing/forkserver.py index 90cc04a8346379..bff7fb91d974b3 100644 --- a/Lib/multiprocessing/forkserver.py +++ b/Lib/multiprocessing/forkserver.py @@ -168,14 +168,14 @@ def ensure_running(self): def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): '''Run forkserver.''' if preload: + if sys_path is not None: + sys.path[:] = sys_path if '__main__' in preload and main_path is not None: process.current_process()._inheriting = True try: spawn.import_main_path(main_path) finally: del process.current_process()._inheriting - if sys_path is not None: - sys.path[:] = sys_path for modname in preload: try: __import__(modname) From cf07467ac684aff2f7f70c6a6836a8530264f8e8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 9 Nov 2024 00:11:16 -0800 Subject: [PATCH 4/7] Redo the test as an sys.path integration test. --- Lib/test/_test_multiprocessing.py | 77 +++++++++++++++++++ .../test_forkserver_main.py | 57 -------------- 2 files changed, 77 insertions(+), 57 deletions(-) delete mode 100644 Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 77b618c684475a..9d3f2ee5f45aab 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -12,6 +12,7 @@ import sys import os import gc +import importlib import errno import functools import signal @@ -20,8 +21,10 @@ import socket import random import logging +import shutil import subprocess import struct +import tempfile import operator import pickle import weakref @@ -6275,6 +6278,80 @@ def test_atexit(self): self.assertEqual(f.read(), 'deadbeef') +class _TestSpawnedSysPath(BaseTestCase): + """Test that sys.path is setup in forkserver and spawn processes.""" + + ALLOWED_TYPES = ('processes',) + + def setUp(self): + self._orig_sys_path = list(sys.path) + self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-") + self._mod_name = "unique_test_mod" + module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py") + with open(module_path, "w", encoding="utf-8") as mod: + mod.write("# A simple test module\n") + sys.path[:] = [p for p in sys.path if p] # remove any existing ""s + sys.path.insert(0, self._temp_dir) + sys.path.insert(0, "") # Replaced with an abspath in child. + try: + self._ctx_forkserver = multiprocessing.get_context("forkserver") + except ValueError: + self._ctx_forkserver = None + self._ctx_spawn = multiprocessing.get_context("spawn") + + def tearDown(self): + sys.path[:] = self._orig_sys_path + shutil.rmtree(self._temp_dir, ignore_errors=True) + + @staticmethod + def enq_imported_module_names(queue): + queue.put(tuple(sys.modules)) + + def test_forkserver_preload_imports_sys_path(self): + if not (ctx := self._ctx_forkserver): + self.skipTest("requires forkserver start method.") + self.assertNotIn(self._mod_name, sys.modules) + multiprocessing.forkserver._forkserver._stop() # Must be fresh. + ctx.set_forkserver_preload( + ["test.test_multiprocessing_forkserver", self._mod_name]) + q = ctx.Queue() + proc = ctx.Process(target=self.enq_imported_module_names, args=(q,)) + proc.start() + proc.join() + child_imported_modules = q.get() + q.close() + self.assertIn(self._mod_name, child_imported_modules) + + @staticmethod + def enq_sys_path_and_import(queue, mod_name): + queue.put(sys.path) + try: + importlib.import_module(mod_name) + except ImportError as exc: + queue.put(exc) + else: + queue.put(None) + + def test_child_sys_path(self): + for ctx in (self._ctx_spawn, self._ctx_forkserver): + if not ctx: + continue + with self.subTest(f"{ctx.get_start_method()} start method"): + q = ctx.Queue() + proc = ctx.Process(target=self.enq_sys_path_and_import, + args=(q, self._mod_name)) + proc.start() + proc.join() + child_sys_path = q.get() + import_error = q.get() + q.close() + self.assertNotIn("", child_sys_path) # replaced by an abspath + self.assertIn(self._temp_dir, child_sys_path) # our addition + # ignore the first element, it is the absolute "" replacement + self.assertEqual(child_sys_path[1:], sys.path[1:]) + self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}") + + class MiscTestCase(unittest.TestCase): def test__all__(self): # Just make sure names in not_exported are excluded diff --git a/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py b/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py deleted file mode 100644 index 1f4ee3ab47214c..00000000000000 --- a/Lib/test/test_multiprocessing_forkserver/test_forkserver_main.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -import sys -import unittest -from unittest import mock - -from multiprocessing import forkserver - - -class TestForkserverMain(unittest.TestCase): - - def setUp(self): - self._orig_sys_path = list(sys.path) - - def tearDown(self): - sys.path[:] = self._orig_sys_path - - @mock.patch("multiprocessing.process.current_process") - @mock.patch("multiprocessing.spawn.import_main_path") - @mock.patch("multiprocessing.util._close_stdin") - def test_preload_kwargs( - self, - mock_close_stdin, - mock_import_main_path, - mock_current_process, - ): - # Very much a whitebox test of the first stanza of main before - # we start diddling with file descriptors and pipes. - mock_close_stdin.side_effect = RuntimeError("stop test") - self.assertNotIn( - "colorsys", - sys.modules.keys(), - msg="Thie test requires a module that has not yet been imported.", - ) - - with self.assertRaisesRegex(RuntimeError, "stop test"): - forkserver.main(None, None, ["sys", "colorsys"]) - mock_current_process.assert_not_called() - mock_import_main_path.assert_not_called() - self.assertIn("colorsys", sys.modules.keys()) - self.assertEqual(sys.path, self._orig_sys_path) # unmodified - - del sys.modules["colorsys"] # unimport - fake_path = os.path.dirname(__file__) - with self.assertRaisesRegex(RuntimeError, "stop test"): - forkserver.main(None, None, ["sys", "colorsys"], sys_path=[fake_path]) - self.assertEqual( - sys.path, [fake_path], msg="sys.path should have been overridden" - ) - self.assertNotIn( - "colorsys", - sys.modules.keys(), - msg="import of colorsys should have failed with unusual sys.path", - ) - - -if __name__ == "__main__": - unittest.main() From 4fe3389baec4f4e924a745bcb94ae55478588323 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 9 Nov 2024 13:46:54 -0800 Subject: [PATCH 5/7] avoid := for readability Co-authored-by: Serhiy Storchaka --- Lib/test/_test_multiprocessing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 9d3f2ee5f45aab..198d1d5fc1494c 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -6308,7 +6308,8 @@ def enq_imported_module_names(queue): queue.put(tuple(sys.modules)) def test_forkserver_preload_imports_sys_path(self): - if not (ctx := self._ctx_forkserver): + ctx = self._ctx_forkserver + if not ctx: self.skipTest("requires forkserver start method.") self.assertNotIn(self._mod_name, sys.modules) multiprocessing.forkserver._forkserver._stop() # Must be fresh. From 3f486f8c2a949040c8944b9308d4a114d684181e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 9 Nov 2024 14:21:44 -0800 Subject: [PATCH 6/7] reword NEWS --- ...-11-07-01-40-11.gh-issue-117378.o9O5uM.rst | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst index 22b7e720eba7ae..ee3db4bd942293 100644 --- a/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst +++ b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst @@ -1,11 +1,17 @@ Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver -process was to correctly inherit the parent's :data:`sys.path` during the -importing of :func:`multiprocessing.set_forkserver_preload` modules in the -same manner as :data:`sys.path` is configured when executing work items in -the worker processes. - -This bug could cause some forkserver module preloading to silently fail to -be preloaded, leading to a performance degration in child processes due to -additional repeated work. It could also have led to a side effect of ``""`` -still being in :data:`sys.path` during forkserver preload imports instead of -the absolute path of the directory that workers see. +process to correctly inherit the parent's :data:`sys.path` during the importing +of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as +:data:`sys.path` is configured in workers before executing work items. + +This bug caused some forkserver module preloading to silently fail to preload. +This manifested as a performance degration in child processes when the +`sys.path` was required due to additional repeated work in every worker. + +It could also have a side effect of ``""`` remaining in :data:`sys.path` during +forkserver preload imports instead of the absolute path from :func:`os.getcwd` +at multiprocessing import time used in the worker ``sys.path``. + +Potentially leading to incorrect imports from the wrong location during +preload. We are unaware of that actually happening. The issue was discovered +by someone observing unexpected preload performance gains. + From 2444519c4959bf6b0014cc31a289c826aacefaef Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 9 Nov 2024 14:30:27 -0800 Subject: [PATCH 7/7] news sphinx-lint fix --- .../next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst index ee3db4bd942293..cdbe21f9f9a663 100644 --- a/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst +++ b/Misc/NEWS.d/next/Library/2024-11-07-01-40-11.gh-issue-117378.o9O5uM.rst @@ -5,7 +5,7 @@ of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as This bug caused some forkserver module preloading to silently fail to preload. This manifested as a performance degration in child processes when the -`sys.path` was required due to additional repeated work in every worker. +``sys.path`` was required due to additional repeated work in every worker. It could also have a side effect of ``""`` remaining in :data:`sys.path` during forkserver preload imports instead of the absolute path from :func:`os.getcwd`