Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of newly created threads in initial seize code #255

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 91 additions & 13 deletions oi/OIDebugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,26 +123,34 @@ bool OIDebugger::patchFunctions(void) {
* Single step an instruction in the target process 'pid' and leave the target
* thread stopped. Returns the current rip.
*/
uint64_t OIDebugger::singlestepInst(pid_t pid, struct user_regs_struct& regs) {
uint64_t OIDebugger::singleStepInst(pid_t pid, struct user_regs_struct& regs) {
int status = 0;

Metrics::Tracing _("single_step_inst");

errno = 0;
if (ptrace(PTRACE_SINGLESTEP, pid, 0, 0) == -1) {
LOG(ERROR) << "Error in singlestep!";
LOG(ERROR) << "singleStepInst: ptrace single error: " << strerror(errno);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this can be replaced with PLOG(ERROR) << "singleStepInst: ptrace single error: "; similarly with the other cases that append strerror.

return 0xdeadbeef;
}

/* Error check... */
waitpid(pid, &status, 0);
/*
* If the waitpid and ptrace() calls below fail then it's bad news but
* let's drive on regardless simply emitting messages as the alternative
* provides no better end result.
*/
if (waitpid(pid, &status, 0) == -1) {
LOG(ERROR) << "singleStepInst waitpid failed: " << strerror(errno);
}

if (!WIFSTOPPED(status)) {
LOG(ERROR) << "process not stopped!";
LOG(ERROR) << "singleStepInst process not stopped!";
}

errno = 0;
if (ptrace(PTRACE_GETREGS, pid, NULL, &regs) < 0) {
LOG(ERROR) << "Couldn't read registers: " << strerror(errno);
LOG(ERROR) << "singleStepInst: Couldn't read registers: "
<< strerror(errno);
}
VLOG(1) << "rip after singlestep: " << std::hex << regs.rip;

Expand Down Expand Up @@ -184,7 +192,7 @@ bool OIDebugger::singleStepFunc(pid_t pid, uint64_t real_end) {
prev = addr;
VLOG(1) << "singlestepFunc addr: " << std::hex << addr << " real_end "
<< real_end;
addr = singlestepInst(pid, regs);
addr = singleStepInst(pid, regs);
dumpRegs("singlestep", pid, &regs);
} while (addr != 0xdeadbeef && addr != real_end && prev != addr);

Expand Down Expand Up @@ -513,7 +521,7 @@ bool OIDebugger::replayTrappedInstr(const trapInfo& t,
LOG(ERROR) << "Execute: Couldn't restore fp registers: " << strerror(errno);
}

uintptr_t rip = singlestepInst(pid, regs);
uintptr_t rip = singleStepInst(pid, regs);

/*
* On entry to this function the current threads %rip is pointing straight
Expand Down Expand Up @@ -820,10 +828,14 @@ bool OIDebugger::processGlobal(const std::string& varName) {

VLOG(1) << "About to wait for process on syscall entry/exit";
int status = 0;
waitpid(traceePid, &status, 0);
if (waitpid(traceePid, &status, 0) == -1) {
LOG(ERROR) << "processGlobal waitpid failed: " << strerror(errno);
return false;
}

if (!WIFSTOPPED(status)) {
LOG(ERROR) << "process not stopped!";
LOG(ERROR) << "processGlobal process not stopped!";
return false;
}

errno = 0;
Expand Down Expand Up @@ -968,7 +980,7 @@ OIDebugger::processTrapRet OIDebugger::processTrap(pid_t pid,
siginfo_t info;

auto stopsig = WSTOPSIG(status);
VLOG(4) << "Stop sig: " << std::dec << stopsig;
VLOG(3) << "Stop sig: " << std::dec << stopsig;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if this was for debugging, but shouldn't this remain a 4?

if (ptrace(PTRACE_GETSIGINFO, newpid, nullptr, &info) < 0) {
LOG(ERROR) << "PTRACE_GETSIGINFO failed with " << strerror(errno);
} else {
Expand Down Expand Up @@ -1648,7 +1660,9 @@ std::optional<typename Sys::RetType> OIDebugger::remoteSyscall(Args... _args) {
}

int status = 0;
waitpid(traceePid, &status, 0);
if (waitpid(traceePid, &status, 0) == -1) {
LOG(ERROR) << "syscall execution waitpid failed: " << strerror(errno);
}

if (!WIFSTOPPED(status)) {
LOG(ERROR) << "process not stopped!";
Expand Down Expand Up @@ -2556,7 +2570,7 @@ bool OIDebugger::targetAttach() {
/*
* As mentioned above, this code is terribly racy and we need a better
* solution. If the seize operation fails we will carry on and assume
* that the thread has exited in between readinf the directory and
* that the thread has exited in between reading the directory and
* here (note: ptrace(2) overloads the ESRCH return but with a seize
* I think it can only mean one thing).
*/
Expand All @@ -2565,6 +2579,70 @@ bool OIDebugger::targetAttach() {
PTRACE_O_TRACEVFORK | PTRACE_O_TRACEEXIT) < 0) {
LOG(ERROR) << "Couldn't seize thread " << pid
<< " (Reason: " << strerror(errno) << ")";

/*
* An EPERM indicates that the thread can't be traced most
* likely owing to:
* - insufficient permissions
* - it is dying or dead.
* - it is already traced.
* Being already traced is a problem here. Consider the following
* scenario: if a thread exists as a result of a clone from a
* previously seized thread it is possible for it to sneak into our
* racy directory-iterator approach and we will attempt to seize it.
* It will be seized (i.e., parented by us) and stopped in a
* signal-delivery-stopped waiting for its tracer (us!) to continue
* it. The parent will also be stopped in a PTRACE_EVENT stop.
*
* Note that the follwing EPRM handling code is slightly noisy in
* terms of debug logging. These statements will be removed when
* confidence is higher.
*/
if (errno == EPERM && !threadList.empty()) {
VLOG(3) << "EPERM encountered in seize. Attempting a waitpid()"
<< std::endl;

int status = 0;
int ret = waitpid(pid, &status, WNOHANG);

if (ret == 0) {
/*
* A return of 0 from a WNOHANG waitpid() indicates that the
* thread exists but has not yet changed state. We'll try a
* continue request and add it to the managed thread list if
* that succeeds.
*/
VLOG(3) << "EPERM seize waitpid 0 attempting continue";
if (contTargetThread(pid)) {
threadList.push_back(pid);
} else {
VLOG(3) << "EPERM seize waitpid 0 continue failed";
}
} else if (ret > 0) {
/*
* This is a newly cloned thread so according to the man page
* it should be a regular signal-delivery-stop and not a
* PTRACE_EVENT stop.
*/
if (WIFSTOPPED(status)) {
VLOG(3) << "Process " << pid << " stopped with "
<< WIFSTOPPED(status);

if (!contTargetThread(pid)) {
VLOG(3) << "targetAttach failed to continue EPERM'd thread";
} else {
// Success! Let's consider this thread ours to own now.
threadList.push_back(pid);
}
} else {
// The thread must be stopped: this should be impossible.
std::cout << "targetAttach EPERM thread not stopped!"
<< std::endl;
}
} else {
VLOG(3) << "EPERM seize, waitpid failed. Ignoring thread.";
}
}
} else {
threadList.push_back(pid);
}
Expand Down
2 changes: 1 addition & 1 deletion oi/OIDebugger.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class OIDebugger {
OIDebugger::processTrapRet processTrap(pid_t, bool = true, bool = true);
bool contTargetThread(bool detach = true) const;
bool isGlobalDataProbeEnabled(void) const;
static uint64_t singlestepInst(pid_t, struct user_regs_struct&);
static uint64_t singleStepInst(pid_t, struct user_regs_struct&);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: should this return uintptr_t? it's casted to that at one of the call sites and seems to make more sense. obviously they're the same type though.

static bool singleStepFunc(pid_t, uint64_t);
bool parseScript(std::istream& script);
bool patchFunctions();
Expand Down