On Windows, using Python 2’s subprocess module to launch a process with a unicode command line that is not strictly from the currently active ANSI code page (i.e. encoding mbcs) will be mangled. All characters that cannot be encoded by mbcs will, in fact, be replaced with ?.

Obviously, this is can be resolved by switching to Python 3, but sometimes, converting to Python 3 is not yet an option. A terrifying prospect in 2018, but a problem nonetheless.

I present the module uniprocess, which defines its custom version of Popen and friends to work around the problem. I hope it proves useful to you.

import sys
import subprocess

import six

if six.PY2 and sys.platform == 'win32':
    import _subprocess
    from types import FunctionType, CodeType

    from ctypes import byref, windll, c_void_p, Structure, sizeof, c_wchar, WinError, POINTER
    from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, HANDLE

    CREATE_UNICODE_ENVIRONMENT = 0x00000400
    LPSECURITY_ATTRIBUTES = c_void_p
    LPBYTE = POINTER(BYTE)


    class STARTUPINFOW(Structure):
        _fields_ = [
            ('cb', DWORD), ('lpReserved', LPWSTR),
            ('lpDesktop', LPWSTR), ('lpTitle', LPWSTR),
            ('dwX', DWORD), ('dwY', DWORD),
            ('dwXSize', DWORD), ('dwYSize', DWORD),
            ('dwXCountChars', DWORD), ('dwYCountChars', DWORD),
            ('dwFillAtrribute', DWORD), ('dwFlags', DWORD),
            ('wShowWindow', WORD), ('cbReserved2', WORD),
            ('lpReserved2', LPBYTE), ('hStdInput', HANDLE),
            ('hStdOutput', HANDLE), ('hStdError', HANDLE),
        ]


    LPSTARTUPINFOW = POINTER(STARTUPINFOW)


    class PROCESS_INFORMATION(Structure):
        _fields_ = [
            ('hProcess', HANDLE), ('hThread', HANDLE),
            ('dwProcessId', DWORD), ('dwThreadId', DWORD),
        ]


    LPPROCESS_INFORMATION = POINTER(PROCESS_INFORMATION)


    class WindowsHandle(c_void_p):
        """Emulate the handle objects in _subprocess."""

        def __init__(self, *a, **kw):
            super(WindowsHandle, self).__init__(*a, **kw)
            self.closed = False

        def Close(self):
            if not self.closed:
                windll.kernel32.CloseHandle(self)
                self.closed = True

        def __int__(self):
            return self.value


    # Using LoadLibrary to avoid our argtypes conflicting.
    CreateProcessW = windll.LoadLibrary('kernel32.dll').CreateProcessW
    CreateProcessW.argtypes = [
        LPWSTR, LPWSTR, LPSECURITY_ATTRIBUTES,
        LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPWSTR,
        LPSTARTUPINFOW, LPPROCESS_INFORMATION,
    ]
    CreateProcessW.restype = BOOL


    def utf8text(maybe_bytes, errors='strict'):
        if maybe_bytes is None:
            return
        if isinstance(maybe_bytes, six.text_type):
            return maybe_bytes
        return maybe_bytes.decode('utf-8', errors)


    def CreateProcess(executable, args, _p_attr, _t_attr,
                      inherit_handles, creation_flags, env, cwd,
                      startup_info):
        int_or_none = lambda x: None if x is None else int(x)

        si = STARTUPINFOW(
            dwFlags=startup_info.dwFlags,
            wShowWindow=startup_info.wShowWindow,
            cb=sizeof(STARTUPINFOW),
            hStdInput=int_or_none(startup_info.hStdInput),
            hStdOutput=int_or_none(startup_info.hStdOutput),
            hStdError=int_or_none(startup_info.hStdError),
        )

        wenv = None
        if env is not None:
            env = (u''.join(u'%s=%s\0' % (k, v) for k, v in env.items())) + u'\0'
            wenv = (c_wchar * len(env))()
            wenv.value = env

        pi = PROCESS_INFORMATION()
        creation_flags |= CREATE_UNICODE_ENVIRONMENT

        if CreateProcessW(executable, args, None, None,
                          inherit_handles, creation_flags,
                          wenv, utf8text(cwd), byref(si), byref(pi)):
            return (WindowsHandle(pi.hProcess), WindowsHandle(pi.hThread),
                    pi.dwProcessId, pi.dwThreadId)
        raise WinError()


    class FakeSubprocess(object):
        def __getattribute__(self, item):
            if item == 'CreateProcess':
                return CreateProcess
            else:
                return getattr(_subprocess, item)


    def replace_globals(function, new_globals):
        func_globals = function.func_globals.copy()
        func_globals.update(new_globals)
        return FunctionType(function.func_code, func_globals, function.func_name,
                            function.func_defaults, function.func_closure)


    def replace_consts(function, new_consts):
        code = function.func_code
        consts = tuple(new_consts.get(const, const) for const in code.co_consts)
        new_code = CodeType(code.co_argcount, code.co_nlocals, code.co_stacksize, code.co_flags,
                            code.co_code, consts, code.co_names, code.co_varnames, code.co_filename,
                            code.co_name, code.co_firstlineno, code.co_lnotab, code.co_freevars,
                            code.co_cellvars)
        function.func_code = new_code


    class Popen(subprocess.Popen):
        _execute_child = replace_globals(subprocess.Popen._execute_child.im_func, {
            '_subprocess': FakeSubprocess(),
        })
        replace_consts(_execute_child, {'{} /c "{}"': u'{} /c "{}"'})


    call = replace_globals(subprocess.call, {'Popen': Popen})
    check_call = replace_globals(subprocess.check_call, {'call': call})
    check_output = replace_globals(subprocess.check_output, {'Popen': Popen})
else:
    Popen = subprocess.Popen
    call = subprocess.call
    check_call = subprocess.check_call
    check_output = subprocess.check_output

Parts of this code was based on this gist, but I fixed some of the bugs in the gist and provided a new Popen class as to not accidentally break existing users of Popen.

I also took the opportunity of using a new idea I came up with to copy the existing function _execute_child, and change it to use the new CreateProcess function, instead of copying the entire method over.

Mind you, this makes the code dependent on exactly how subprocess is written. In general, this is a bad idea. However, since we are already messing with an undocumented method _execute_child anyway, it is not exactly worse. Python 2 is also a dead language, and there will be no significant changes in its last days.

I might blog modifying existing Python functions, as done here, in another post.

Hopefully, you found this post useful, and learned something interesting!