From 6baed3d43425da1633472a227251bdf9532cdda2 Mon Sep 17 00:00:00 2001 From: Fredrik Fornwall Date: Mon, 2 Oct 2023 12:33:11 +0200 Subject: [PATCH] Avoid executing ELF files directly --- .clang-tidy | 2 + .github/workflows/ci.yml | 28 ++ .gitignore | 3 + Makefile | 31 +- README.md | 68 +++- exec-variants.c | 146 ++++++++ termux-exec.c | 704 +++++++++++++++++++++++++++------------ test-program.c | 75 +++++ 8 files changed, 836 insertions(+), 221 deletions(-) create mode 100644 .clang-tidy create mode 100644 .github/workflows/ci.yml create mode 100644 exec-variants.c create mode 100644 test-program.c diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..42ff3fe --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,2 @@ +Checks: '-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,-clang-analyzer-security.insecureAPI.strcpy,-clang-analyzer-valist.Uninitialized' + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..fafe33a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: + - '*' + pull_request: + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: Homebrew/actions/setup-homebrew@master + - run: brew install clang-format + - run: make + - run: make check + - run: make unit-test + + actionlint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Download actionlint + id: get_actionlint + run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) + - name: Check workflow files + run: ${{ steps.get_actionlint.outputs.executable }} -color diff --git a/.gitignore b/.gitignore index 1c90fce..1719acc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ *.so *.o *-actual +*.swo +*.swp +test-binary diff --git a/Makefile b/Makefile index fa5a3a0..863bc5b 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,14 @@ -TERMUX_PREFIX := /data/data/com.termux/files/usr TERMUX_BASE_DIR := /data/data/com.termux/files -CFLAGS += -Wall -Wextra -Werror -Oz +CFLAGS += -Wall -Wextra -Werror -Wshadow -O2 +C_SOURCE := termux-exec.c exec-variants.c +CLANG_FORMAT := clang-format --sort-includes --style="{ColumnLimit: 120}" $(C_SOURCE) +CLANG_TIDY := clang-tidy -libtermux-exec.so: termux-exec.c - $(CC) $(CFLAGS) $(LDFLAGS) termux-exec.c -DTERMUX_PREFIX=\"$(TERMUX_PREFIX)\" -DTERMUX_BASE_DIR=\"$(TERMUX_BASE_DIR)\" -shared -fPIC -o libtermux-exec.so +libtermux-exec.so: $(C_SOURCE) + $(CC) $(CFLAGS) $(LDFLAGS) $(C_SOURCE) -DTERMUX_PREFIX=\"$(TERMUX_PREFIX)\" -DTERMUX_BASE_DIR=\"$(TERMUX_BASE_DIR)\" -shared -fPIC -o libtermux-exec.so + +clean: + rm -f libtermux-exec.so tests/*-actual test-binary install: libtermux-exec.so install libtermux-exec.so $(DESTDIR)$(PREFIX)/lib/libtermux-exec.so @@ -11,10 +16,20 @@ install: libtermux-exec.so uninstall: rm -f $(DESTDIR)$(PREFIX)/lib/libtermux-exec.so -test: libtermux-exec.so +on-device-tests: libtermux-exec.so @LD_PRELOAD=${CURDIR}/libtermux-exec.so ./run-tests.sh -clean: - rm -f libtermux-exec.so tests/*-actual +format: + $(CLANG_FORMAT) -i $(C_SOURCE) + +check: + $(CLANG_FORMAT) --dry-run $(C_SOURCE) + $(CLANG_TIDY) -warnings-as-errors='*' $(C_SOURCE) -- + +test-binary: $(C_SOURCE) + $(CC) $(CFLAGS) $(LDFLAGS) $(C_SOURCE) -g -fsanitize=address -fno-omit-frame-pointer -DUNIT_TEST=1 -o test-binary + +unit-test: test-binary + ./test-binary -.PHONY: clean install test uninstall +.PHONY: clean install uninstall test format check-format test diff --git a/README.md b/README.md index ae7caa9..b299c59 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,66 @@ # termux-exec -A `execve()` wrapper to fix problem with shebangs when running in Termux. +A `execve()` wrapper to fix two problems with exec-ing files in Termux. -# Problem +# Problem 1: Cannot execute files not part of the APK +Android 10 started blocking executing files under the app data directory, as +that is a [W^X](https://en.wikipedia.org/wiki/W%5EX) violation - files should be either +writeable or executable, but not both. Resources: + +- [Google Android issue](https://issuetracker.google.com/issues/128554619) +- [Termux: No more exec from data folder on targetAPI >= Android Q](https://github.com/termux/termux-app/issues/1072) +- [Termux: Revisit the Android W^X problem](https://github.com/termux/termux-app/issues/2155) + +While there is merit in that general principle, this prevents using Termux and Android +as a general computing device, where it should be possible for users to create executable +scripts and binaries. + +# Solution 1: Cannot execute files not part of the APK +Create an `execve()` wrapper that instead of exec-ing an ELF file directly, executes +`/system/bin/linker64 /path/to/elf`. Explanation follows below. + +On Linux, the kernel is normally responsible for loading both the executable and the +[dynamic linker](https://en.wikipedia.org/wiki/Dynamic_linker). The executable is invoked +by filename with `execve()`. The kernel loads the executable into the process, and looks +for a `PT_INTERP` entry in its ELF Program Headers; this specifies the filename of the +dynamic linker (`/system/bin/linker64` for 64-bit Android). This entry exists for +dynamically linked executables. + +There is another way to load the two ELF objects: the dynamic linker can be invoked directly +with `execve()`. If passed the filename of an executable, the dynamic linker will load the +executable itself. So, instead of executing `path/to/mybinary`, it's possible to execute +`/system/bin/linker64 /absolute/path/to/mybinary` (the linker needs an absolute path). + +This is what `termux-exec` does to circumvent the block on executing files in the data +directory - the kernel sees only `/system/bin/linker64` being executed. + +This also means that we need to extract shebangs. So for example, a call to execute: + +```sh +./path/to/myscript.sh +``` + +where the script has a `#!/path/to/interpreter` shebang, is replaced with: + +```sh +/system/bin/linker64 /path/to/interpreter ./path/to/myscript.sh +``` + +Implications: + +- It's important that `LD_PRELOAD` is kept - see e.g. [this change in sshd](https://github.com/termux/termux-packages/pull/18069). +We could also consider patching this exec interception into the build process of termux packages, so `LD_PRELOAD` would not be necessary for packages built by the termux-packages repository. + +- The executable will be `/system/bin/linker64`. So some programs that inspects the executable name (on itself or other programs) needs to be changed. See [this llvm driver change](https://github.com/termux/termux-packages/pull/18074) and [this pgrep/pkill change](https://github.com/termux/termux-packages/pull/18075). + +**NOTE**: The above example used `/system/bin/linker64` - on 32-bit systems, the corresponding +path is `/system/bin/linker`. + +**NOTE**: While this circumvents the technical restriction, it still might be considered +violating [Google Play policy](https://support.google.com/googleplay/android-developer/answer/9888379). +So this workaround is not guaranteed to enable Play store distribution of Termux - but it's +worth an attempt, and regardless of Play store distribution, updating the targetSdk is necessary. + +# Problem 2: Shebang paths A lot of Linux software is written with the assumption that `/bin/sh`, `/usr/bin/env` and similar file exists. This is not the case on Android where neither `/bin/` nor `/usr/` exists. @@ -9,7 +68,7 @@ exists. When building packages for Termux those hard-coded assumptions are patched away - but this does not help with installing scripts and programs from other sources than Termux packages. -# Solution +# Solution 2: Shebang paths Create an `execve()` wrapper that rewrites calls to execute files under `/bin/` and `/usr/bin` into the matching Termux executables under `$PREFIX/bin/` and inject that into processes using `LD_PRELOAD`. @@ -22,3 +81,6 @@ using `LD_PRELOAD`. # Where is LD_PRELOAD set? The `$PREFIX/bin/login` program which is used to create new Termux sessions checks for `$PREFIX/lib/libtermux-exec.so` and if so sets up `LD_PRELOAD` before launching the login shell. + +Soon, when making a switch to target Android 10+, this will be setup by the Termux app even before +launching any process, as `LD_PRELOAD` will be necessary for anything non-system to execute. diff --git a/exec-variants.c b/exec-variants.c new file mode 100644 index 0000000..217c92e --- /dev/null +++ b/exec-variants.c @@ -0,0 +1,146 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +enum { ExecL, ExecLE, ExecLP }; + +static int __exec_as_script(const char *buf, char *const *argv, char *const *envp) { + size_t arg_count = 1; + while (argv[arg_count] != NULL) + ++arg_count; + + const char *script_argv[arg_count + 2]; + script_argv[0] = "sh"; + script_argv[1] = buf; + memcpy(script_argv + 2, argv + 1, arg_count * sizeof(char *)); + return execve("/data/data/com.termux/files/usr/bin/sh", (char **const)script_argv, envp); +} + +int execv(const char *name, char *const *argv) { return execve(name, argv, environ); } + +int execvp(const char *name, char *const *argv) { return execvpe(name, argv, environ); } + +int execvpe(const char *name, char *const *argv, char *const *envp) { + // if (name == NULL || *name == '\0') { errno = ENOENT; return -1; } + + // If it's an absolute or relative path name, it's easy. + if (strchr(name, '/') && execve(name, argv, envp) == -1) { + if (errno == ENOEXEC) + return __exec_as_script(name, argv, envp); + return -1; + } + + // Get the path we're searching. + const char *path = getenv("PATH"); + if (path == NULL) + path = _PATH_DEFPATH; + + // Make a writable copy. + size_t len = strlen(path) + 1; + char writable_path[len]; + memcpy(writable_path, path, len); + + bool saw_EACCES = false; + + // Try each element of $PATH in turn... + char *strsep_buf = writable_path; + const char *dir; + while ((dir = strsep(&strsep_buf, ":"))) { + // It's a shell path: double, leading and trailing colons + // mean the current directory. + if (*dir == '\0') + dir = "."; + + size_t dir_len = strlen(dir); + size_t name_len = strlen(name); + + char buf[dir_len + 1 + name_len + 1]; + mempcpy(mempcpy(mempcpy(buf, dir, dir_len), "/", 1), name, name_len + 1); + + execve(buf, argv, envp); + switch (errno) { + case EISDIR: + case ELOOP: + case ENAMETOOLONG: + case ENOENT: + case ENOTDIR: + break; + case ENOEXEC: + return __exec_as_script(buf, argv, envp); + return -1; + case EACCES: + saw_EACCES = true; + break; + default: + return -1; + } + } + if (saw_EACCES) + errno = EACCES; + return -1; +} + +static int __execl(int variant, const char *name, const char *argv0, va_list ap) { + // Count the arguments. + va_list count_ap; + va_copy(count_ap, ap); + size_t n = 1; + while (va_arg(count_ap, char *) != NULL) { + ++n; + } + va_end(count_ap); + + // Construct the new argv. + char *argv[n + 1]; + argv[0] = (char *)argv0; + n = 1; + while ((argv[n] = va_arg(ap, char *)) != NULL) { + ++n; + } + + // Collect the argp too. + char **argp = (variant == ExecLE) ? va_arg(ap, char **) : environ; + + va_end(ap); + + return (variant == ExecLP) ? execvp(name, argv) : execve(name, argv, argp); +} + +int execl(const char *name, const char *arg, ...) { + va_list ap; + va_start(ap, arg); + int result = __execl(ExecL, name, arg, ap); + va_end(ap); + return result; +} + +int execle(const char *name, const char *arg, ...) { + va_list ap; + va_start(ap, arg); + int result = __execl(ExecLE, name, arg, ap); + va_end(ap); + return result; +} + +int execlp(const char *name, const char *arg, ...) { + va_list ap; + va_start(ap, arg); + int result = __execl(ExecLP, name, arg, ap); + va_end(ap); + return result; +} + +int fexecve(int fd, char *const *argv, char *const *envp) { + char buf[40]; + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd); + execve(buf, argv, envp); + if (errno == ENOENT) + errno = EBADF; + return -1; +} diff --git a/termux-exec.c b/termux-exec.c index 1f8037d..ac1e960 100644 --- a/termux-exec.c +++ b/termux-exec.c @@ -1,4 +1,11 @@ +// Make android_get_device_api_level() use an inline variant, +// as a libc symbol for it exists only in android-29+. +#undef __ANDROID_API__ +#define __ANDROID_API__ 28 + +#define _GNU_SOURCE #include +#include #include #include #include @@ -8,231 +15,508 @@ #include #include #include -#include + +#ifdef __ANDROID__ +#include +#endif #ifndef TERMUX_BASE_DIR -# define TERMUX_BASE_DIR "/data/data/com.termux/files" +#define TERMUX_BASE_DIR "/data/data/com.termux/files" #endif -#ifndef TERMUX_PREFIX -# define TERMUX_PREFIX "/data/data/com.termux/files/usr" +#define TERMUX_BIN_PATH TERMUX_BASE_DIR "/usr/bin/" + +#if UINTPTR_MAX == 0xffffffff +#define SYSTEM_LINKER_PATH "/system/bin/linker"; +#elif UINTPTR_MAX == 0xffffffffffffffff +#define SYSTEM_LINKER_PATH "/system/bin/linker64"; #endif #ifdef __aarch64__ -# define EM_NATIVE EM_AARCH64 +#define EM_NATIVE EM_AARCH64 #elif defined(__arm__) || defined(__thumb__) -# define EM_NATIVE EM_ARM +#define EM_NATIVE EM_ARM #elif defined(__x86_64__) -# define EM_NATIVE EM_X86_64 +#define EM_NATIVE EM_X86_64 #elif defined(__i386__) -# define EM_NATIVE EM_386 +#define EM_NATIVE EM_386 #else -# error "unknown arch" +#error "unknown arch" #endif -#define starts_with(value, str) !strncmp(value, str, sizeof(str) - 1) - -static const char* termux_rewrite_executable(const char* filename, char* buffer, int buffer_len) -{ - if (starts_with(filename, TERMUX_BASE_DIR) || - starts_with(filename, "/system/")) - return filename; - - strcpy(buffer, TERMUX_PREFIX "/bin/"); - char* bin_match = strstr(filename, "/bin/"); - if (bin_match == filename || bin_match == (filename + 4)) { - // We have either found "/bin/" at the start of the string or at - // "/xxx/bin/". Take the path after that. - strncpy(buffer + sizeof(TERMUX_PREFIX "/bin/") - 1, bin_match + 5, buffer_len - sizeof(TERMUX_PREFIX "/bin/")); - filename = buffer; - } - return filename; +// The implementation in bionic: +extern int __execve(const char *pathname, char *const *argv, char *const *envp); + +// Check if `string` starts with `prefix`. +static bool starts_with(const char *string, const char *prefix) { return strncmp(prefix, string, strlen(prefix)) == 0; } + +// Rewrite e.g. "/bin/sh" and "/usr/bin/sh" to "${TERMUX_PREFIX}/bin/sh". +static const char *termux_rewrite_executable(const char *executable_path, char *buffer, int buffer_len) { + if (executable_path[0] != '/') { + return executable_path; + } + + char *bin_match = strstr(executable_path, "/bin/"); + if (bin_match == executable_path || bin_match == (executable_path + 4)) { + // Found "/bin/" or "/xxx/bin" at the start of executable_path. + strcpy(buffer, TERMUX_BIN_PATH); + char *dest = buffer + sizeof(TERMUX_BIN_PATH) - 1; + // Copy what comes after "/bin/": + const char *src = bin_match + 5; + size_t bytes_to_copy = buffer_len - sizeof(TERMUX_BIN_PATH); + strncpy(dest, src, bytes_to_copy); + executable_path = buffer; + } + + return executable_path; +} + +// Remove LD_LIBRARY_PATH and LD_LIBRARY_PATH entries from envp. +static void remove_ld_from_env(char *const *envp, char ***allocation) { + bool create_new_env = false; + int env_length = 0; + while (envp[env_length] != NULL) { + if (starts_with(envp[env_length], "LD_LIBRARY_PATH=") || starts_with(envp[env_length], "LD_PRELOAD=")) { + create_new_env = true; + } + env_length++; + } + + if (!create_new_env) { + return; + } + + char **new_envp = malloc(sizeof(char *) * env_length); + *allocation = new_envp; + int new_envp_idx = 0; + int old_envp_idx = 0; + + while (old_envp_idx < env_length) { + if (!starts_with(envp[old_envp_idx], "LD_LIBRARY_PATH=") && !starts_with(envp[old_envp_idx], "LD_PRELOAD=")) { + new_envp[new_envp_idx++] = envp[old_envp_idx]; + } + old_envp_idx++; + } + + new_envp[new_envp_idx] = NULL; } -static char*const * remove_ld_preload(char*const * envp) -{ - for (int i = 0; envp[i] != NULL; i++) { - if (strstr(envp[i], "LD_PRELOAD=") == envp[i]) { - int env_length = 0; - while (envp[env_length] != NULL) env_length++; - - char** new_envp = malloc(sizeof(char*) * env_length); - int new_envp_idx = 0; - int old_envp_idx = 0; - while (old_envp_idx < env_length) { - if (old_envp_idx != i) { - new_envp[new_envp_idx++] = envp[old_envp_idx]; - } - old_envp_idx++; - } - new_envp[env_length] = NULL; - return new_envp; - } - } - return envp; +// From https://stackoverflow.com/questions/4774116/realpath-without-resolving-symlinks/34202207#34202207 +static const char *normalize_path(const char *src, char *result_buffer) { + char pwd[PATH_MAX]; + if (getcwd(pwd, sizeof(pwd)) == NULL) { + return src; + } + + size_t res_len; + size_t src_len = strlen(src); + + const char *ptr = src; + const char *end = &src[src_len]; + const char *next; + + if (src_len == 0 || src[0] != '/') { + // relative path + size_t pwd_len = strlen(pwd); + memcpy(result_buffer, pwd, pwd_len); + res_len = pwd_len; + } else { + res_len = 0; + } + + for (ptr = src; ptr < end; ptr = next + 1) { + next = (char *)memchr(ptr, '/', end - ptr); + if (next == NULL) { + next = end; + } + size_t len = next - ptr; + switch (len) { + case 2: + if (ptr[0] == '.' && ptr[1] == '.') { + const char *slash = (char *)memrchr(result_buffer, '/', res_len); + if (slash != NULL) { + res_len = slash - result_buffer; + } + continue; + } + break; + case 1: + if (ptr[0] == '.') { + continue; + } + break; + case 0: + continue; + } + + if (res_len != 1) { + result_buffer[res_len++] = '/'; + } + + memcpy(&result_buffer[res_len], ptr, len); + res_len += len; + } + + if (res_len == 0) { + result_buffer[res_len++] = '/'; + } + result_buffer[res_len] = '\0'; + return result_buffer; } -int execve(const char* filename, char* const* argv, char* const* envp) -{ - bool android_10_debug = getenv("TERMUX_ANDROID10_DEBUG") != NULL; - if (android_10_debug) { - printf("execve(%s):\n", filename); - int tmp_argv_count = 0; - while (argv[tmp_argv_count] != NULL) { - printf(" %s\n", argv[tmp_argv_count]); - tmp_argv_count++; - } - } - - int fd = -1; - const char** new_argv = NULL; - const char** new_envp = NULL; - - char filename_buffer[512]; - filename = termux_rewrite_executable(filename, filename_buffer, sizeof(filename_buffer)); - - // Error out if the file is not executable: - if (access(filename, X_OK) != 0) goto final; - - fd = open(filename, O_RDONLY); - if (fd == -1) goto final; - - // LD_LIBRARY_PATH messes up system programs with CANNOT_LINK_EXECUTABLE errors. - // If we remove.it, this problem is solved. - // /system/bin/sh is fine, it only uses libc++, libc, and libdl. - if (starts_with(filename, "/system/") && strcmp(filename, "/system/bin/sh") != 0) { - - size_t envp_count = 0; - while (envp[envp_count] != NULL) - envp_count++; - - new_envp = malloc((envp_count + 1) * sizeof(char*)); - - size_t pos = 0; - for (size_t i = 0; i < envp_count; i++) { - // Skip it if it is LD_LIBRARY_PATH or LD_PRELOAD - if (!starts_with(envp[i], "LD_LIBRARY_PATH=") && - !starts_with(envp[i], "LD_PRELOAD=")) - new_envp[pos++] = (const char*)envp[i]; - } - new_envp[pos] = NULL; - - envp = (char**)new_envp; - // Not.sure if needed. - environ = (char**)new_envp; - } - - // execve(2): "A maximum line length of 127 characters is allowed - // for the first line in a #! executable shell script." - char header[128]; - ssize_t read_bytes = read(fd, header, sizeof(header) - 1); - - // If we are executing a non-native ELF file, unset LD_PRELOAD. - // This avoids CANNOT LINK EXECUTABLE errors when running 32-bit code - // on 64-bit. - if (read_bytes >= 20 && !memcmp(header, ELFMAG, SELFMAG)) { - Elf32_Ehdr* ehdr = (Elf32_Ehdr*)header; - if (ehdr->e_machine != EM_NATIVE) { - envp = remove_ld_preload(envp); - } - goto final; - } - if (read_bytes < 5 || !(header[0] == '#' && header[1] == '!')) goto final; - - header[read_bytes] = 0; - char* newline_location = strchr(header, '\n'); - if (newline_location == NULL) goto final; - - // Strip whitespace at end of shebang: - while (*(newline_location - 1) == ' ') newline_location--; - - // Null-terminate the shebang line: - *newline_location = 0; - - // Skip whitespace to find interpreter start: - char* interpreter = header + 2; - while (*interpreter == ' ') interpreter++; - if (interpreter == newline_location) goto final; - - char* arg = NULL; - char* whitespace_pos = strchr(interpreter, ' '); - if (whitespace_pos != NULL) { - // Null-terminate the interpreter string. - *whitespace_pos = 0; - - // Find start of argument: - arg = whitespace_pos + 1; - while (*arg != 0 && *arg == ' ') arg++; - if (arg == newline_location) { - // Only whitespace after interpreter. - arg = NULL; - } - } - - char interp_buf[512]; - const char* new_interpreter = termux_rewrite_executable(interpreter, interp_buf, sizeof(interp_buf)); - if (new_interpreter == interpreter) goto final; - - int orig_argv_count = 0; - while (argv[orig_argv_count] != NULL) orig_argv_count++; - - new_argv = malloc(sizeof(char*) * (4 + orig_argv_count)); - - int current_argc = 0; - new_argv[current_argc++] = basename(interpreter); - if (arg) new_argv[current_argc++] = arg; - new_argv[current_argc++] = filename; - int i = 1; - while (orig_argv_count-- > 1) new_argv[current_argc++] = argv[i++]; - new_argv[current_argc] = NULL; - - filename = new_interpreter; - argv = (char**) new_argv; - -final: - if (fd != -1) close(fd); - int (*real_execve)(const char*, char* const[], char* const[]) = dlsym(RTLD_NEXT, "execve"); - - bool android_10_wrapping = getenv("TERMUX_ANDROID10") != NULL; - if (android_10_wrapping) { - char realpath_buffer[PATH_MAX]; - bool realpath_ok = realpath(filename, realpath_buffer) != NULL; - if (realpath_ok) { - bool wrap_in_proot = (strstr(realpath_buffer, TERMUX_BASE_DIR) != NULL); - if (android_10_debug) { - printf("termux-exec: realpath(\"%s\") = \"%s\", wrapping=%s\n", filename, realpath_buffer, wrap_in_proot ? "yes" : "no"); - } - if (wrap_in_proot) { - orig_argv_count = 0; - while (argv[orig_argv_count] != NULL) orig_argv_count++; - - new_argv = malloc(sizeof(char*) * (2 + orig_argv_count)); - filename = TERMUX_PREFIX "/bin/proot"; - new_argv[0] = "proot"; - for (int i = 0; i < orig_argv_count; i++) { - new_argv[i + 1] = argv[i]; - } - new_argv[orig_argv_count + 1] = NULL; - argv = (char**) new_argv; - // Remove LD_PRELOAD environment variable when wrapping in proot - envp = remove_ld_preload(envp); - } - } else { - errno = 0; - } - - if (android_10_debug) { - printf("real_execve(%s):\n", filename); - int tmp_argv_count = 0; - while (argv[tmp_argv_count] != NULL) { - printf(" %s\n", argv[tmp_argv_count]); - tmp_argv_count++; - } - } - } - - int ret = real_execve(filename, argv, envp); - free(new_argv); - free(new_envp); - return ret; +struct file_header_info { + bool is_elf; + // If executing a 32-bit binary on a 64-bit host: + bool is_non_native_elf; + char interpreter_buf[256]; + char const *interpreter; + char const *interpreter_arg; +}; + +static void inspect_file_header(char *header, size_t header_len, struct file_header_info *result) { + if (header_len >= 20 && !memcmp(header, ELFMAG, SELFMAG)) { + result->is_elf = true; + Elf32_Ehdr *ehdr = (Elf32_Ehdr *)header; + if (ehdr->e_machine != EM_NATIVE) { + result->is_non_native_elf = true; + } + return; + } + + if (header_len < 5 || !(header[0] == '#' && header[1] == '!')) { + return; + } + + // Check if the header contains a newline to end the shebang line: + char *newline_location = memchr(header, '\n', header_len); + if (newline_location == NULL) { + return; + } + + // Strip whitespace at end of shebang: + while (*(newline_location - 1) == ' ') { + newline_location--; + } + + // Null terminate the shebang line: + *newline_location = 0; + + // Skip whitespace to find interpreter start: + char const *interpreter = header + 2; + while (*interpreter == ' ') { + interpreter++; + } + if (interpreter == newline_location) { + // Just a blank line up until the newline. + return; + } + + // Check for whitespace following the interpreter: + char *whitespace_pos = strchr(interpreter, ' '); + if (whitespace_pos != NULL) { + // Null-terminate the interpreter string. + *whitespace_pos = 0; + + // Find start of argument: + char *interpreter_arg = whitespace_pos + 1; + while (*interpreter_arg != 0 && *interpreter_arg == ' ') { + interpreter_arg++; + } + if (interpreter_arg != newline_location) { + result->interpreter_arg = interpreter_arg; + } + } + + result->interpreter = + termux_rewrite_executable(interpreter, result->interpreter_buf, sizeof(result->interpreter_buf)); } + +// Interceptor of the execve(2) system call using LD_PRELOAD. +int execve(const char *executable_path, char *const argv[], char *const envp[]) { + const bool termux_exec_debug = getenv("TERMUX_EXEC_DEBUG") != NULL; + if (termux_exec_debug) { + fprintf(stderr, "[termux-exec] Intercepting execve('%s'):\n", executable_path); + int tmp_argv_count = 0; + while (argv[tmp_argv_count] != NULL) { + fprintf(stderr, "[termux-exec] arg[%d] = %s\n", tmp_argv_count, argv[tmp_argv_count]); + tmp_argv_count++; + } + } + + const char *orig_executable_path = executable_path; + + char executable_path_buffer[PATH_MAX]; + executable_path = termux_rewrite_executable(executable_path, executable_path_buffer, sizeof(executable_path_buffer)); + if (termux_exec_debug) { + fprintf(stderr, "[termux-exec] Possibly rewritten path: '%s'\n", executable_path); + } + + if (access(executable_path, X_OK) != 0) { + // Error out if the file is not executable: + errno = EACCES; + return -1; + } + + int fd = open(executable_path, O_RDONLY); + if (fd == -1) { + errno = ENOENT; + return -1; + } + + // execve(2): "The kernel imposes a maximum length on the text that follows the "#!" characters + // at the start of a script; characters beyond the limit are ignored. Before Linux 5.1, the + // limit is 127 characters. Since Linux 5.1, the limit is 255 characters." + // We use one more byte since inspect_file_header() will null terminate the buffer. + char header[256]; + ssize_t read_bytes = read(fd, header, sizeof(header) - 1); + close(fd); + + struct file_header_info info = { + .interpreter = NULL, + .interpreter_arg = NULL, + }; + inspect_file_header(header, read_bytes, &info); + + if (!info.is_elf && info.interpreter == NULL) { + errno = ENOEXEC; + return -1; + } + + if (info.interpreter != NULL) { + executable_path = info.interpreter; + } + + // Resolve the absolute path of the binary to execute so we can tell if. + char realpath_buffer[PATH_MAX]; + bool realpath_ok = realpath(executable_path, realpath_buffer) != NULL; + + char normalized_path_buffer[PATH_MAX]; + + char **new_allocated_envp = NULL; + + const char **new_argv = NULL; + if (realpath_ok) { + bool wrap_in_linker = +#ifdef __ANDROID__ + android_get_device_api_level() >= 29 && +#endif + (strstr(realpath_buffer, TERMUX_BASE_DIR) != NULL); + + if (termux_exec_debug) { + fprintf(stderr, "[termux-exec] realpath(\"%s\") = \"%s\", wrapping=%s\n", executable_path, realpath_buffer, + wrap_in_linker ? "yes" : "no"); + } + + bool cleanup_env = info.is_non_native_elf || + (starts_with(executable_path, "/system/") && !starts_with(executable_path, "/system/bin/sh")); + if (cleanup_env) { + remove_ld_from_env(envp, &new_allocated_envp); + if (new_allocated_envp) { + envp = new_allocated_envp; + } + } + + const bool argv_needs_rewriting = wrap_in_linker || info.interpreter != NULL; + if (argv_needs_rewriting) { + int orig_argv_count = 0; + while (argv[orig_argv_count] != NULL) { + orig_argv_count++; + } + + new_argv = malloc(sizeof(char *) * (2 + orig_argv_count)); + int current_argc = 0; + + // Keep program name: + new_argv[current_argc++] = argv[0]; + + // Specify executable path if wrapping with linker: + if (wrap_in_linker) { + // Normalize path without resolving symlink. For instance, $PREFIX/bin/ls is + // a symlink to $PREFIX/bin/coreutils, but we need to execute + // "/system/bin/linker $PREFIX/bin/ls" so that coreutils knows what to execute. + new_argv[current_argc++] = normalize_path(executable_path, normalized_path_buffer); + executable_path = SYSTEM_LINKER_PATH; + } + + // Add interpreter argument and script path if exec:ing a script with shebang: + if (info.interpreter != NULL) { + if (info.interpreter_arg) { + new_argv[current_argc++] = info.interpreter_arg; + } + new_argv[current_argc++] = orig_executable_path; + } + + for (int i = 1; i < orig_argv_count; i++) { + new_argv[current_argc++] = argv[i]; + } + new_argv[current_argc] = NULL; + argv = (char **)new_argv; + } + } else { + if (termux_exec_debug) { + perror("[termux-exec] realpath failed"); + } + errno = 0; + } + + if (termux_exec_debug) { + fprintf(stderr, "[termux-exec] Calling real execve('%s'):\n", executable_path); + int tmp_argv_count = 0; + int arg_count = 0; + while (argv[tmp_argv_count] != NULL) { + fprintf(stderr, "[termux-exec] arg[%d] = %s\n", arg_count++, argv[tmp_argv_count]); + tmp_argv_count++; + } + } + + int (*real_execve)(const char *, char *const[], char *const[]) = dlsym(RTLD_NEXT, "execve"); + int real_execve_return_value = real_execve(executable_path, argv, envp); + // int real_execve_return_value = __execve(executable_path, argv, envp); + int saved_errno = errno; + free(new_argv); + free(new_allocated_envp); + errno = saved_errno; + return real_execve_return_value; +} + +#ifdef UNIT_TEST +#include + +void assert_string_equals(const char *expected, const char *actual) { + if (strcmp(actual, expected) != 0) { + fprintf(stderr, "Assertion failed - expected '%s', was '%s'\n", expected, actual); + } +} + +void test_starts_with() { + assert(starts_with("/path/to/file", "/path")); + assert(!starts_with("/path", "/path/to/file")); +} + +void test_termux_rewrite_executable() { + char buf[PATH_MAX]; + assert_string_equals(TERMUX_BIN_PATH "sh", termux_rewrite_executable("/bin/sh", buf, PATH_MAX)); + assert_string_equals(TERMUX_BIN_PATH "sh", termux_rewrite_executable("/usr/bin/sh", buf, PATH_MAX)); + assert_string_equals("/system/bin/sh", termux_rewrite_executable("/system/bin/sh", buf, PATH_MAX)); + assert_string_equals("/system/bin/tool", termux_rewrite_executable("/system/bin/tool", buf, PATH_MAX)); + assert_string_equals(TERMUX_BIN_PATH "sh", termux_rewrite_executable(TERMUX_BIN_PATH "sh", buf, PATH_MAX)); + assert_string_equals(TERMUX_BIN_PATH, termux_rewrite_executable("/bin/", buf, PATH_MAX)); + assert_string_equals("./ab/sh", termux_rewrite_executable("./ab/sh", buf, PATH_MAX)); +} + +void test_remove_ld_from_env() { + { + char *test_env[] = {"MY_ENV=1", NULL}; + char **allocated_envp; + remove_ld_from_env(test_env, &allocated_envp); + assert(allocated_envp == NULL); + assert_string_equals("MY_ENV=1", test_env[0]); + assert(test_env[1] == NULL); + } + { + char *test_env[] = {"MY_ENV=1", "LD_PRELOAD=a", NULL}; + char **allocated_envp; + remove_ld_from_env(test_env, &allocated_envp); + assert(allocated_envp != NULL); + assert_string_equals("MY_ENV=1", allocated_envp[0]); + assert(allocated_envp[1] == NULL); + free(allocated_envp); + } + { + char *test_env[] = {"MY_ENV=1", "LD_PRELOAD=a", "A=B", "LD_LIBRARY_PATH=B", "B=C", NULL}; + char **allocated_envp; + remove_ld_from_env(test_env, &allocated_envp); + assert(allocated_envp != NULL); + assert_string_equals("MY_ENV=1", allocated_envp[0]); + assert_string_equals("A=B", allocated_envp[1]); + assert_string_equals("B=C", allocated_envp[2]); + assert(allocated_envp[3] == NULL); + free(allocated_envp); + } +} + +void test_normalize_path() { + char expected[PATH_MAX * 2]; + char pwd[PATH_MAX]; + char normalized_path_buffer[PATH_MAX]; + + assert(getcwd(pwd, sizeof(pwd)) != NULL); + + sprintf(expected, "%s/path/to/binary", pwd); + assert_string_equals(expected, normalize_path("path/to/binary", normalized_path_buffer)); + assert_string_equals(expected, normalize_path("path/../path/to/binary", normalized_path_buffer)); + assert_string_equals(expected, normalize_path("./path/to/../to/binary", normalized_path_buffer)); + assert_string_equals( + "/usr/bin/sh", normalize_path("../../../../../../../../../../../../usr/./bin/../bin/sh", normalized_path_buffer)); +} + +void test_inspect_file_header() { + char header[256]; + struct file_header_info info = {.interpreter_arg = NULL}; + + sprintf(header, "#!/bin/sh\n"); + inspect_file_header(header, 256, &info); + assert(!info.is_elf); + assert(!info.is_non_native_elf); + assert_string_equals(TERMUX_BIN_PATH "sh", info.interpreter); + assert(info.interpreter_arg == NULL); + + sprintf(header, "#!/bin/sh -x\n"); + inspect_file_header(header, 256, &info); + assert(!info.is_elf); + assert(!info.is_non_native_elf); + assert_string_equals(TERMUX_BIN_PATH "sh", info.interpreter); + assert_string_equals("-x", info.interpreter_arg); + + sprintf(header, "#!/bin/sh -x \n"); + inspect_file_header(header, 256, &info); + assert(!info.is_elf); + assert(!info.is_non_native_elf); + assert_string_equals(TERMUX_BIN_PATH "sh", info.interpreter); + assert_string_equals("-x", info.interpreter_arg); + + sprintf(header, "#!/bin/sh -x \n"); + inspect_file_header(header, 256, &info); + assert(!info.is_elf); + assert(!info.is_non_native_elf); + assert_string_equals(TERMUX_BIN_PATH "sh", info.interpreter); + assert_string_equals("-x", info.interpreter_arg); + + info.interpreter = NULL; + info.interpreter_arg = NULL; + // An ELF header for a 32-bit file. + // See https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + sprintf(header, "\177ELF"); + // Native instruction set: + header[0x12] = EM_NATIVE; + header[0x13] = 0; + inspect_file_header(header, 256, &info); + assert(info.is_elf); + assert(!info.is_non_native_elf); + assert(info.interpreter == NULL); + assert(info.interpreter_arg == NULL); + + info.interpreter = NULL; + info.interpreter_arg = NULL; + // An ELF header for a 64-bit file. + // See https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + sprintf(header, "\177ELF"); + // 'Fujitsu MMA Multimedia Accelerator' instruction set - likely non-native. + header[0x12] = 0x36; + header[0x13] = 0; + inspect_file_header(header, 256, &info); + assert(info.is_elf); + assert(info.is_non_native_elf); + assert(info.interpreter == NULL); + assert(info.interpreter_arg == NULL); +} + +int main() { + test_starts_with(); + test_termux_rewrite_executable(); + test_remove_ld_from_env(); + test_normalize_path(); + test_inspect_file_header(); + return 0; +} +#endif diff --git a/test-program.c b/test-program.c new file mode 100644 index 0000000..a7f559f --- /dev/null +++ b/test-program.c @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#define TERMUX_APT_PATH "/data/data/com.termux/files/usr/bin/apt" + +extern char **environ; + +int main() { + if (fork() == 0) { + char *const argv[] = { "apt", "--version", NULL }; + printf("# execve\n"); + execve(TERMUX_APT_PATH, argv, environ); + return 0; + } + + sleep(1); + if (fork() == 0) { + printf("# execl\n"); + execl(TERMUX_APT_PATH, "apt", "--version", NULL); + return 0; + } + + sleep(1); + if (fork() == 0) { + printf("# execlp\n"); + execlp("apt", "apt", "--version", NULL); + return 0; + } + + sleep(1); + if (fork() == 0) { + printf("# execle\n"); + execle(TERMUX_APT_PATH, "apt", "--version", NULL, environ); + return 0; + } + + sleep(1); + if (fork() == 0) { + char *const argv[] = { "apt", "--version", NULL }; + printf("# execv\n"); + execv(TERMUX_APT_PATH, argv); + return 0; + } + + sleep(1); + if (fork() == 0) { + char *const argv[] = { "apt", "--version", NULL }; + printf("# execvp\n"); + execvp("apt", argv); + return 0; + } + + sleep(1); + if (fork() == 0) { + char *const argv[] = { "apt", "--version", NULL }; + printf("# execvpe\n"); + execvpe("apt", argv, environ); + return 0; + } + + sleep(1); + if (fork() == 0) { + char *const argv[] = { "apt", "--version", NULL }; + int fd = open(TERMUX_APT_PATH, 0); + printf("# fexecve\n"); + fexecve(fd, argv, environ); + return 0; + } + + sleep(1); + + return 0; +}