<i18n dev> RFR: 8337077: Java uses wrong Charset in System.out when running on MINGW
Rostislav Krasny
rostiprodev at gmail.com
Fri Aug 23 22:56:17 UTC 2024
On Tue, 20 Aug 2024 at 19:34, Naoto Sato <naoto.sato at oracle.com> wrote:
> Hello,
>
Hello Nato. Nice to meet you.
> As I commented in the bug report, I closed the issue as "not an issue",
> as LC_* environment values on Windows has never been supported (or even
> considered) as a means to set locale/encoding as the way POSIX does. It
> would create some inconsistent state between Windows' locale settings
> which may cause some unexpected behavior in applications.
>
We are talking not about the whole Windows but only about the MSYS2/MINGW
console that has its own support of UTF-8 that is enabled by default. We
also are talking about a console encoding exclusively and not about any
other locale parameter. The MSYS2/MINGW console is widely used by many
developers on Windows because of the Git popularity and the Git for Windows
project that comes with the MINGW64 variant of MSYS2, including its
UNIX-like console with UTF-8 enabled by default. Many software engineers
use that console not just for running Git commands but also for doing many
other things, including running Java console applications. There are some
developers who use the MSYS2 project directly and also may run Java
applications inside the MSYS2 console.
The LC_* environment variables are defined by MSYS2 only and not by
Windows. The same is also true about the OSTYPE and MSYSTEM environment
variables. The only problem using OSTYPE or MSYSTEM to identify MSYS2 in
Java could be if cmd.exe is started from the MSYS2 console. In this case
that cmd.exe process will inherit all environment variables, including the
above that were set by MSYS2 previously. But practically I don't see any
reason for anybody to run cmd.exe from MSYS2 console.
Ok but there is another way to identify that your process is running under
MSYS2 by another, probably a more reliable way. You can find your parent
process and check what DLLs it has loaded. If it has msys-2.0.dll loaded
then your process is running under MSYS2 and not under cmd.exe even if that
cmd.exe was started from MSYS2. Please take a look at the attached PoC code
made by ChatGPT.
Example of how this PoC works after it was built by Visual Studio 2022 MSVC:
$ ./stam01.exe
Parent Process ID: 1460
Parent Process Name: bash.exe
Parent Process Full Path: C:\Program Files\Git\usr\bin\bash.exe
Loaded DLLs in Parent Process (ID: 1460):
C:\Program Files\Git\usr\bin\bash.exe
C:\Windows\SYSTEM32\ntdll.dll
C:\Windows\System32\KERNEL32.DLL
C:\Windows\System32\KERNELBASE.dll
C:\Windows\System32\USER32.dll
C:\Windows\System32\win32u.dll
C:\Program Files\Git\usr\bin\msys-2.0.dll
C:\Windows\System32\GDI32.dll
C:\Windows\System32\gdi32full.dll
C:\Windows\System32\msvcp_win.dll
C:\Windows\System32\ucrtbase.dll
C:\Windows\System32\advapi32.dll
C:\Windows\System32\msvcrt.dll
C:\Windows\System32\sechost.dll
C:\Windows\System32\RPCRT4.dll
C:\Windows\System32\bcrypt.dll
C:\Windows\SYSTEM32\CRYPTBASE.DLL
C:\Windows\System32\bcryptPrimitives.dll
C:\Windows\System32\IMM32.DLL
Parent process has msys-2.0.dll loaded. Running under MSYS2.
D:\develop\cpp-tryouts\x64\Debug>stam01.exe
Parent Process ID: 13016
Parent Process Name: cmd.exe
Parent Process Full Path: C:\Windows\System32\cmd.exe
Loaded DLLs in Parent Process (ID: 13016):
C:\Windows\System32\cmd.exe
C:\Windows\SYSTEM32\ntdll.dll
C:\Windows\System32\KERNEL32.DLL
C:\Windows\System32\KERNELBASE.dll
C:\Windows\System32\msvcrt.dll
C:\Windows\System32\combase.dll
C:\Windows\System32\ucrtbase.dll
C:\Windows\System32\RPCRT4.dll
C:\Windows\SYSTEM32\winbrand.dll
C:\Windows\System32\bcrypt.dll
C:\Windows\System32\sechost.dll
Parent process does not have msys-2.0.dll loaded. Likely running under
native Windows.
Once you have found that your parent process has msys-2.0.dll loaded, you
can safely check the LC_* environment variables.
If the parent process doesn't have msys-2.0.dll loaded, continue to
identify the console encoding as you do it now.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mail.openjdk.org/pipermail/i18n-dev/attachments/20240824/c044f65d/attachment-0001.htm>
-------------- next part --------------
#include <windows.h>
#include <tlhelp32.h>
#include <psapi.h>
#include <tchar.h>
#include <stdio.h>
#include <string.h>
DWORD GetParentProcessID(DWORD processID) {
HANDLE hSnapshot;
PROCESSENTRY32 pe32;
DWORD parentPID = 0;
hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
if (hSnapshot == INVALID_HANDLE_VALUE) {
return 0;
}
pe32.dwSize = sizeof(PROCESSENTRY32);
if (Process32First(hSnapshot, &pe32)) {
do {
if (pe32.th32ProcessID == processID) {
parentPID = pe32.th32ParentProcessID;
break;
}
} while (Process32Next(hSnapshot, &pe32));
}
CloseHandle(hSnapshot);
return parentPID;
}
void GetProcessFullPath(DWORD processID, TCHAR* processPath, DWORD size) {
HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, processID);
if (hProcess) {
if (GetModuleFileNameEx(hProcess, NULL, processPath, size) == 0) {
_tcsncpy_s(processPath, size, _T("Unknown"), _TRUNCATE);
}
CloseHandle(hProcess);
}
else {
_tcsncpy_s(processPath, size, _T("Unknown"), _TRUNCATE);
}
}
void PrintLoadedModules(DWORD processID) {
HMODULE hMods[1024];
HANDLE hProcess;
DWORD cbNeeded;
unsigned int i;
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, processID);
if (hProcess == NULL) {
_tprintf(TEXT("Could not open process %u.\n"), processID);
return;
}
if (EnumProcessModules(hProcess, hMods, sizeof(hMods), &cbNeeded)) {
_tprintf(TEXT("Loaded DLLs in Parent Process (ID: %u):\n"), processID);
for (i = 0; i < (cbNeeded / sizeof(HMODULE)); i++) {
TCHAR szModName[MAX_PATH];
if (GetModuleFileNameEx(hProcess, hMods[i], szModName, sizeof(szModName) / sizeof(TCHAR))) {
_tprintf(TEXT("\t%s\n"), szModName);
}
}
}
else {
_tprintf(TEXT("Could not enumerate modules for process %u.\n"), processID);
}
CloseHandle(hProcess);
}
BOOL IsMsys2DllLoaded(DWORD processID) {
HMODULE hMods[1024];
HANDLE hProcess;
DWORD cbNeeded;
unsigned int i;
BOOL msys2Loaded = FALSE;
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, processID);
if (hProcess == NULL) {
return FALSE;
}
if (EnumProcessModules(hProcess, hMods, sizeof(hMods), &cbNeeded)) {
for (i = 0; i < (cbNeeded / sizeof(HMODULE)); i++) {
TCHAR szModName[MAX_PATH];
if (GetModuleFileNameEx(hProcess, hMods[i], szModName, sizeof(szModName) / sizeof(TCHAR))) {
// Convert to lowercase for case-insensitive comparison
_tcslwr_s(szModName, sizeof(szModName) / sizeof(TCHAR));
if (_tcsstr(szModName, _T("msys-2.0.dll")) != NULL) {
msys2Loaded = TRUE;
break;
}
}
}
}
CloseHandle(hProcess);
return msys2Loaded;
}
int main() {
DWORD currentProcessID = GetCurrentProcessId();
DWORD parentProcessID = GetParentProcessID(currentProcessID);
TCHAR parentProcessName[MAX_PATH] = TEXT("<unknown>");
TCHAR parentProcessPath[MAX_PATH] = TEXT("<unknown>");
if (parentProcessID == 0) {
printf("Unable to find parent process.\n");
return 1;
}
// Get the parent process full path
GetProcessFullPath(parentProcessID, parentProcessPath, sizeof(parentProcessPath) / sizeof(TCHAR));
// Extract the process name from the full path
_tcsncpy_s(parentProcessName, MAX_PATH, _tcsrchr(parentProcessPath, '\\') + 1, _TRUNCATE);
_tprintf(TEXT("Parent Process ID: %u\n"), parentProcessID);
_tprintf(TEXT("Parent Process Name: %s\n"), parentProcessName);
_tprintf(TEXT("Parent Process Full Path: %s\n"), parentProcessPath);
// Print all loaded DLLs in the parent process
PrintLoadedModules(parentProcessID);
// Check if the parent process has msys-2.0.dll loaded
if (IsMsys2DllLoaded(parentProcessID)) {
printf("Parent process has msys-2.0.dll loaded. Running under MSYS2.\n");
}
else {
printf("Parent process does not have msys-2.0.dll loaded. Likely running under native Windows.\n");
}
return 0;
}
More information about the i18n-dev
mailing list