From 3b41292263abfb746f41223d57969025997da6b7 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Fri, 19 Mar 2021 17:41:05 +0800 Subject: [PATCH] Parse musl libc path from header This is probably slightly faster and more robust than the ldd method since it does not require ldd on PATH, and avoids some terminal encoding issues. The ldd approach is kept in edge cases where the executable is somehow not readable. I suspect ldd would not be very useful in this scenario either, but there's not harm being safe? --- packaging/_musllinux.py | 78 +++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/packaging/_musllinux.py b/packaging/_musllinux.py index 82ada871..aaf32499 100644 --- a/packaging/_musllinux.py +++ b/packaging/_musllinux.py @@ -5,19 +5,63 @@ """ import functools +import operator +import os import re import shutil +import struct import subprocess import sys -from typing import Iterator, NamedTuple, Optional +from typing import IO, Iterator, NamedTuple, Optional, Tuple -class _MuslVersion(NamedTuple): - major: int - minor: int +def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, f.read(struct.calcsize(fmt))) -def _get_ld_musl(executable: str) -> Optional[str]: +def _get_ld_musl_ctypes(f: IO[bytes]) -> Optional[str]: + """Detect musl libc location by parsing the Python executable. + + Based on https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca + """ + f.seek(0) + try: + ident = _read_unpacked(f, "16B") + except struct.error: + return None + if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. + return None + f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, p_fmt, p_idx = { + 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. + 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. + }[ident[4]] + except KeyError: + return None + else: + p_get = operator.itemgetter(*p_idx) + + # Find the interpreter section and return its content. + _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) + for i in range(e_phnum + 1): + f.seek(e_phoff + e_phentsize * i) + p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) + if p_type != 3: + continue + f.seek(p_offset) + interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") + if "musl" not in interpreter: + return None + return interpreter + return None + + +def _get_ld_musl_ldd(executable: str) -> Optional[str]: ldd = shutil.which("ldd") if not ldd: # No dynamic program loader. return None @@ -28,18 +72,30 @@ def _get_ld_musl(executable: str) -> Optional[str]: ) if proc.returncode != 0: # Not a valid dynamic program. return None - ld_musl_pat = re.compile(r"^.+/ld-musl-.+$") - for line in proc.stdout.splitlines(): - m = ld_musl_pat.match(line) - if not m: + for line in proc.stdout.splitlines(keepends=False): + path = line.lstrip().rsplit(None, 1)[0] + if "musl" not in path: continue - return m.string.strip().rsplit(None, 1)[0] - return None # Musl ldd path not found -- program not linked against musl. + return path + return None + + +def _get_ld_musl(executable: str) -> Optional[str]: + try: + with open(executable, "rb") as f: + return _get_ld_musl_ctypes(f) + except IOError: + return _get_ld_musl_ldd(executable) _version_pat = re.compile(r"^Version (\d+)\.(\d+)", flags=re.MULTILINE) +class _MuslVersion(NamedTuple): + major: int + minor: int + + @functools.lru_cache() def _get_musl_version(executable: str) -> Optional[_MuslVersion]: """Detect currently-running musl runtime version.