如何使用ptrace函数来获得多线程的一致看法？ [英] How to use PTRACE to get a consistent view of multiple threads?

查看：910 发布时间：2016/8/18 12:50:23 c linux multithreading pthreads ptrace

本文介绍了如何使用ptrace函数来获得多线程的一致看法？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

当我在这个问题上工作，我遇到一个使用 ptrace的，但我无法得到如何 ptrace的与线程交互。有正确的认识。

假设我有一个给定的，多线程的主要过程，我想（从派生的子也许）连接到一个特定的线程在里面。

我可以连接到一个特定的线程？（该手册分歧在这个问题上。）

如果是这样，这是否意味着单步只能通过一个线程的指令的步？是否停止所有进程的线程？

如果是这样，而我呼吁所有其他线程保持停止 PTRACE_SYSCALL 或 PTRACE_SINGLESTEP 或做的所有的线程继续？有没有办法挺身而出只在一个单一的线程，但保证其他线程保持停止状态？

基本上，我想通过强制所有线程停止对原程序进行同步，然后只能通过单步一追查线程中执行一小套的单线程指令。

我个人的努力，到目前为止看起来有点像这样：

 将为pid_t目标=系统调用（SYS_gettid）; //获取调用线程的ID
将为pid_t PID =叉（）;如果（PID大于0）
{
    waitpid函数（PID，NULL，0）; //同步主进程    important_instruction（）;
}
否则，如果（PID == 0）
{
    ptrace的（目标，PTRACE_ATTACH，NULL，NULL）; //工作的呢？    //取消父母的waitpid函数的号召，例如用信号    //单步执行important_instruction（）上方   ptrace的（目标，PTRACE_DETACH，NULL，NULL）; //父母的线程恢复？   _exit（0）;
}

不过，我不知道，也不能找到合适的引用，这是同时，正确的，而且 important_instruction（）是保证，只有当执行所有其它线程都将停止。我也明白，有可能是竞争条件当父从其他地方接收信号，并且听说我应该使用 PTRACE_SEIZE 来代替，但是这似乎并不满天飞。

任何澄清或引用将大大AP preciated！

解决方案

我写了第二个测试案例。我不得不添加一个单独的答案，因为它是太长，不适合进入包括第一个与示例输出。

首先，这里是 tracer.c ：

 的#include＆LT;＆unistd.h中GT;
＃包括LT＆;＆stdlib.h中GT;
＃包括LT＆; SYS / types.h中＆GT;
＃包括LT＆; SYS / ptrace.h＆GT;
＃包括LT＆; SYS / prctl.h＆GT;
＃包括LT＆; SYS / wait.h＆GT;
＃包括LT＆; SYS / user.h＆GT;
＃包括LT＆;＆dirent.h GT;
＃包括LT＆;＆string.h中GT;
＃包括LT＆;＆signal.h中GT;
＃包括LT＆;＆errno.h中GT;
＃包括LT＆;＆stdio.h中GT;
的#ifndef SINGLESTEPS
＃定义SINGLESTEPS 10
＃万一/ *为函数getline（类似），除得到进程pid任务的ID。
 *返回正面的，如果成功（的TID的列表号）
 *否则为0，并将errno设置。 * /
为size_t get_tids（将为pid_t **常量listptr，为size_t * const的sizeptr，常量将为pid_t PID）
{
    字符目录名称[64];
    DIR * DIR;
    将为pid_t *名单;
    为size_t大小，使用= 0;    如果（listptr || sizeptr || PID＆LT;！（将为pid_t）1）{
        错误号= EINVAL;
        回报（为size_t）0;
    }    如果（* sizeptr大于0）{
        名单= * listptr;
        大小= * sizeptr;
    }其他{
        名单= * listptr = NULL;
        大小= * sizeptr = 0;
    }    如果（的snprintf（目录名，目录名的sizeof的/ proc /％D /任务/（INT）PID）＆GT; =（int）的目录名的sizeof）{
        错误号= ENOTSUP;
        回报（为size_t）0;
    }    DIR =执行opendir（目录名）;
    如果（！DIR）{
        错误号= ESRCH;
        回报（为size_t）0;
    }    而（1）{
        结构的dirent *耳鼻喉科;
        int值;
        烧焦假;        错误号= 0;
        ENT = READDIR（DIR）;
        如果（！ENT）
            打破;        / *解析的TID。忽略非数字的条目。 * /
        如果（sscanf的（ent-＆GT; d_name，％D％C，＆安培;价值＆安培;！虚拟）= 1）
            继续;        / *忽略显然无效项。 * /
        如果（值。1）
            继续;        / *确保有空间的另一个TID。 * /
        如果（使用＆GT; =大小）{
            大小=（用来| 127）+ 128;
            表=的realloc（列表，大小* sizeof的列表[0]）;
            如果（！列表）{
                closedir（DIR）;
                错误号= ENOMEM;
                回报（为size_t）0;
            }
            * listptr =清单;
            * sizeptr =大小;
        }        /* 添加到列表。 * /
        列表[使用++] =（将为pid_t）值;
    }
    如果（错误）{
        const int的saved_errno =错误号;
        closedir（DIR）;
        错误号= saved_errno;
        回报（为size_t）0;
    }
    如果（closedir（DIR））{
        错误号= EIO;
        回报（为size_t）0;
    }    /* 没有？ * /
    如果（采用与。1）{
        错误号= ESRCH;
        回报（为size_t）0;
    }    / *确保有空间终止（将为pid_t）0。 * /
    如果（使用＆GT; =大小）{
        大小=使用+ 1;
        表=的realloc（列表，大小* sizeof的列表[0]）;
        如果（！列表）{
            错误号= ENOMEM;
            回报（为size_t）0;
        }
        * listptr =清单;
        * sizeptr =大小;
    }    / *终止名单;完成。 * /
    列表[使用] =（将为pid_t）0;
    错误号= 0;
    返回使用;
}
静态INT wait_process（常量将为pid_t PID，廉政* const的statusptr）
{
    INT状态;
    将为pid_t磷;    做{
        状态= 0;
        P = waitpid函数（PID，和放大器;状态，WUNTRACED | WCONTINUED）;
    }而（P = =（将为pid_t）-1放大器;＆放大器;错误号== EINTR）;
    如果（P！= PID）
        返回错误号= ESRCH;    如果（statusptr）
        * statusptr =状态;    返回错误号= 0;
}静态INT continue_process（常量将为pid_t PID，廉政* const的statusptr）
{
    INT状态;
    将为pid_t磷;    做{        如果（杀（PID，SIGCONT）== -1）
            返回错误号= ESRCH;        做{
            状态= 0;
            P = waitpid函数（PID，和放大器;状态，WUNTRACED | WCONTINUED）;
        }而（P = =（将为pid_t）-1放大器;＆放大器;错误号== EINTR）;        如果（P！= PID）
            返回错误号= ESRCH;    }而（WIFSTOPPED（状态））;    如果（statusptr）
        * statusptr =状态;    返回错误号= 0;
}无效show_registers（FILE * const的出来，将为pid_t TID，为const char * const的注意事项）
{
    结构user_regs_struct暂存器;
    长 -  [R;    做{
        R = ptrace函数（PTRACE_GETREGS，TID和放大器;暂存器，＆安培;暂存器）;
    }而（R == -1L＆放大器;＆放大器;错误号== ESRCH）;
    如果（R == -1L）
        返回;＃如果（定义（__ x86_64__）||定义（__ i386__））及和放大器; __WORDSIZE == 64
    如果（注意与功放;＆放大器; *注）
        fprintf中（满分，任务％D：RIP =为0x％016lx，RSP =为0x％016lx％S \\ n，（INT）TID，regs.rip，regs.rsp，注意）;
    其他
        fprintf中（满分，任务％D：RIP =为0x％016lx，RSP =为0x％016lx \\ n，（INT）TID，regs.rip，regs.rsp）;
#elif指令（定义（__ x86_64__）||定义（__ i386__））及和放大器; __WORDSIZE == 32
    如果（注意与功放;＆放大器; *注）
        fprintf中（满分，任务％D：EIP =为0x％山东省实验中学，ESP =为0x％08X％S \\ n，（INT）TID，regs.eip，regs.rsp，注意）;
    其他
        fprintf中（满分，任务％D：EIP =为0x％山东省实验中学，ESP =为0x％08X \\ n，（INT）TID，regs.eip，regs.rsp）;
＃万一
}
INT主（INT ARGC，CHAR *的argv []）
{
    将为pid_t * TID = 0;
    为size_t TIDS = 0;
    为size_t tids_max = 0;
    为size_t T，S;
    长 -  [R;    将为pid_t的孩子;
    INT状态;    如果（ARGC＆LT;！2 || STRCMP（的argv [1]，-h）|| STRCMP（的argv [1]，--help））{
        fprintf中（标准错误，\\ n）;
        fprintf中（标准错误，用法：％s的[-h | --help] \\ N的argv [0]）;
        fprintf中（标准错误，％s命令[参数...] \\ n，argv的[0]）;
        fprintf中（标准错误，\\ n）;
        fprintf中（标准错误，这个项目的一个子进程执行命令\\ n）;
        fprintf中（标准错误，并等待其停止（通过SIGSTOP信号）\\ n）;
        fprintf中（标准错误，发生这种情况时，每个线程\\ n的寄存器状态）;
        fprintf中（STDERR，转储到标准输出，那么子进程\\ n）;
        fprintf中（STDERR，会发送SIGCONT信号\\ n）;
        fprintf中（标准错误，\\ n）;
        返回1;
    }    孩子= fork（）的;
    如果（孩子==（将为pid_t）-1）{
        fprintf中（标准错误，fork（）的失败：％S \\ n，字符串错误（错误））;
        返回1;
    }    如果（！孩子）{
        使用prctl（PR_SET_DUMPABLE，（长），1）;
        使用prctl（PR_SET_PTRACER，（长）getppid（））;
        fflush（标准输出）;
        fflush（标准错误）;
        execvp（ARGV [1]，ARGV + 1）;
        fprintf中（标准错误，％s的信息：％s \\ n，ARGV [1]，字符串错误（错误））;
        返回127;
    }    fprintf中（标准错误，示踪：等待孩子（PID％D）活动\\ n \\ n，（INT）的孩子）;
    fflush（标准错误）;    而（1）{        / *等待一个孩子的事件。 * /
        如果（wait_process（儿童，和放大器;状态））
            打破;        / *已退出？ * /
        如果（WIFEXITED（状态）|| WIFSIGNALED（状态））{
            错误号= 0;
            打破;
        }        / *在这一点上，只能停事件很有趣。 * /
        如果（！WIFSTOPPED（状态））
            继续;        / *获取任务的ID。 * /
        TIDS = get_tids（安培; TID，＆安培; tids_max，子女）;
        如果（！TIDS）
            打破;        的printf（进程％d的％d个任务，（INT）的孩子，（INT）TIDS）;
        fflush（标准输出）;        / *连接到所有任务。 * /
        对于（t = 0; T＆LT; TIDS;吨++）{
            做{
                R = ptrace函数（PTRACE_ATTACH，TID [T]，（无效*）0（无效*）0）;
            }而（R == -1L＆放大器;及（错误号== || EBUSY errno的== || EFAULT错误号== ESRCH））;
            如果（R == -1L）{
                const int的saved_errno =错误号;
                而（T  - 大于0）
                    做{
                        R = ptrace函数（PTRACE_DETACH，TID [T]，（无效*）0（无效*）0）;
                    }而（R == -1L＆放大器;及（错误号== || EBUSY errno的== || EFAULT错误号== ESRCH））;
                TIDS = 0;
                错误号= saved_errno;
                打破;
            }
        }
        如果（！TIDS）{
            const int的saved_errno =错误号;
            如果（continue_process（儿童，和放大器;状态））
                打破;
            的printf（无法连接（％S）\\ n，于strerror（saved_errno））;
            fflush（标准输出）;
            如果（WIFCONTINUED（状态））
                继续;
            错误号= 0;
            打破;
        }        的printf（附所有人\\ n \\ n）;
        fflush（标准输出）;        / *转储每个任务的寄存器。 * /
        对于（t = 0; T＆LT; TIDS;吨++）
            show_registers（标准输出，TID [T]，）;
        的printf（\\ n）;
        fflush（标准输出）;        为（S = 0; S＆下; SINGLESTEPS氏++）{
            做{
                R = ptrace函数（PTRACE_SINGLESTEP，TID [TIDS-1]（无效*）0（无效*）0）;
            }而（R == -1L＆放大器;＆放大器;错误号== ESRCH）;
            如果（！r）{
                对于（t = 0; T＆LT; TIDS  -  1;吨++）
                    show_registers（标准输出，TID [T]，）;
                show_registers（标准输出，TID [TIDS-1]，一步推进。）;
                的printf（\\ n）;
                fflush（标准输出）;
            }其他{
                fprintf中（标准错误，单步失败：％S \\ n，字符串错误（错误））;
                fflush（标准错误）;
            }
        }        / *所有任务分离。 * /
        对于（t = 0; T＆LT; TIDS;吨++）
            做{
                R = ptrace函数（PTRACE_DETACH，TID [T]，（无效*）0（无效*）0）;
            }而（R == -1放大器;及（错误号== || EBUSY errno的== || EFAULT错误号== ESRCH））;
        TIDS = 0;
        如果（continue_process（儿童，和放大器;状态））
            打破;
        如果（WIFCONTINUED（状态））{
            的printf（独立式等待新的停止事件\\ n \\ n。）;
            fflush（标准输出）;
            继续;
        }
        错误号= 0;
        打破;
    }
    如果（错误）
        fprintf中（标准错误，示踪：儿童丧失（％S）\\ n字符串错误（错误））;
    其他
    如果（WIFEXITED（状态））
        fprintf中（标准错误，示踪：儿童离开（％D）\\ n，WEXITSTATUS（状态））;
    其他
    如果（WIFSIGNALED（状态））
        fprintf中（标准错误，示踪：儿童从信号％d个\\ n死了，WTERMSIG（状态））;
    其他
        fprintf中（标准错误，示踪：儿童消失\\ n）;
    fflush（标准错误）;    返回状态;
}

tracer.c 执行指定的命令，等待命令接收 SIGSTOP 信号。（ tracer.c 不发送它自己;你可以有tracee自行停止，或外部发送信号）

在命令停止， tracer.c 附加一个ptrace的每一个线程，单步一个线程的步骤（<$ C $固定数量C> SINGLESTEPS 编译时间常数），显示为每个线程相关的寄存器状态。

在此之后，它会从命令分离，并将其发送一个 SIGCONT 信号，让它继续正常运作。

下面是一个简单的测试程序， worker.c中，我用于测试：

 的#include＆LT; pthreads.h中＆GT;
＃包括LT＆;＆signal.h中GT;
＃包括LT＆;＆string.h中GT;
＃包括LT＆;＆errno.h中GT;
＃包括LT＆;＆stdio.h中GT;的#ifndef THREADS
＃定义THREADS 2
＃万一挥发性sig_atomic_t完成= 0;无效catch_done（INT正负号）
{
    DONE =正负号;
}INT INSTALL_DONE（const int的正负号）
{
    结构sigaction的行为;    sigemptyset（安培; act.sa_mask）;
    act.sa_handler = catch_done;
    act.sa_flags = 0;
    如果（的sigaction（Signum的，与放大器;行为，NULL））
        返回错误号;
    其他
        返回0;
}void *的工人（void *的数据）
{
    挥发性无符号长* const的柜台=数据;    而（！完成）
        __sync_add_and_fetch（计数器，1UL）;    回报（无效*）（无符号长）__ sync_or_and_fetch（计数器，0UL）;
}INT主要（无效）
{
    无符号长计数器= 0UL;
    线程的pthread_t [THREADS]
    pthread_attr_t ATTRS;
    为size_t我;    如果（INSTALL_DONE（SIGHUP）||
        INSTALL_DONE（SIGTERM）||
        INSTALL_DONE（SIGUSR1））{
        fprintf中（标准错误，工人：无法安装信号处理信息：％s \\ n，字符串错误（错误））;
        返回1;
    }    pthread_attr_init（安培; ATTRS）;
    pthread_attr_setstacksize（安培; ATTRS，65536）;
    对于（i = 0; I＆LT;螺纹;我++）
        如果（在pthread_create（安培;螺纹[I]，和放大器; ATTRS，工人，和放大器;计数器））{
            做= 1;
            fprintf中（标准错误，工人：无法创建线程：％S \\ n，字符串错误（错误））;
            返回1;
        }
    pthread_attr_destroy（安培; ATTRS）;    / *让原来的线程也做工人的舞蹈。 * /
    工人（安培;计数器）;    对于（i = 0; I＆LT;螺纹;我++）
        在pthread_join（线程[I]，NULL）;    返回0;
}

编译使用例如两者。

  GCC -W -Wall -O3 -fomit-frame-pointer的worker.c中-pthread -o工人
GCC -W -Wall -O3 -fomit-frame-pointer的tracer.c -o示踪剂

和运行无论是在一个单独的终端，或在背景，使用例如

  ./示踪./worker＆安培;

示踪剂显示了工人的PID：

 示踪：等待孩子（PID 24275）事件。

此时，孩子运行正常。当您发送 SIGSTOP 孩子的行动开始。示踪检测到它，确实所需的跟踪，然后分离并让孩子继续正常：

 杀-STOP 24275过程24275有3个任务，附在所有。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfa6ee8。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a63，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a65，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a58，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a63，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a65，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a58，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfa6ee8。一步前进。任务24275：RIP = 0x0000000000400a5d，RSP = 0x00007fff6895c428。
任务24276：RIP = 0x0000000000400a5d，RSP = 0x00007f399cfb7ee8。
任务24277：RIP = 0x0000000000400a63，RSP = 0x00007f399cfa6ee8。一步前进。分离。等待新的停止事件。

您可以重复上述多次如你所愿。请注意，我选择了 SIGSTOP 信号作为触发器，因为这样一来 tracer.c 也是作为一个有用的基础生成复杂的多线程核心的每个请求转储（如多线程进程可以简单地通过将自身发送触发它 SIGSTOP ）。

在工人（）函数的线程都在上面的例子中所有纺纱的拆卸：

  0x400a50：EB 0B JMP 0x400a5d
0x400a52：66 0F 44 1F 00 00 nopw为0x0（RAX％，％RAX，1）
0x400a58：F0 48 83 07 01锁定addq $为0x1（％RDI）=第四步
0x400a5d：8B 05 00 00 00 00 MOV为0x0（％RIP），％eax中=第一步
0x400a63：85 C0的测试％EAX，EAX％=第二步
0x400a65：74 F1 JE 0x400a58 =第三步
0x400a67：48 8B 07 MOV（％RDI），RAX％
0x400a6a：48 89 C2 MOV％RAX，RDX％
0x400a6d：F0 48 0F B1 07锁定CMPXCHG％RAX，（％RDI）
0x400a72：75 F6 JNE 0x400a6a
0x400a74：48 89 D0 MOV％的RDX，RAX％
0x400a77：C3 retq

现在，这个测试程序也只显示了如何停止一个进程，连接到它的所有线程，单步执行的一个线程的指令所需数量，然后让所有的线程继续正常;它没有的没有的证明，同样适用于特定的让线程继续正常（通过 PTRACE_CONT ）。不过，我在下面说明详细说明，对我来说，同样的办法应该工作的优良 PTRACE_CONT 。

主要的问题还是我惊喜时遇到写上面的测试程序是的必要性

 长 -  [R;做{
    R = ptrace函数（PTRACE_cmd，工业贸易署，...）;
}而（R == -1L＆放大器;及（错误号== || EBUSY errno的== || EFAULT错误号== ESRCH））;

循环，尤其是对 ESRCH 情况下（其他人我只由于加入的 ptrace手册页描述）。

您看到的，当任务被停止大部分ptrace的命令只允许。然而，当它仍是在完成例如该任务不停止单步命令。因此，使用上述的循环 - 也许添加毫秒了nanosleep或相似，以避免浪费CPU的 - 使得确保previous ptrace的命令已经完成（以及因此的任务停止）之前我们试图提供新的

Kerrek SB，我相信至少有一些你有你的测试程序的烦恼是由于这个问题？对我来说，就个人而言，这是一个类型的的 D'哦！的时刻意识到，这当然是必要的，因为ptracing本质上是异步的，不同步的。

（这不同步也是对 SIGCONT 的原因 - 我上面提到的 PTRACE_CONT 互动本人对于信使用循环妥善处理如上图所示，即交互不再是一个问题 - 实际上是可以理解的）

添加到意见这样的回答：

Linux内核使用的task_struct结构的一组任务状态标志（见<一href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/linux/sched.h\"><$c$c>include/linux/sched.h的定义）来跟踪每个任务的状态。 的ptrace的用户空间面临端（）在<定义href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/ptrace.c\"><$c$c>kernel/ptrace.c.

在 PTRACE_SINGLESTEP 或 PTRACE_CONT 被调用，<一个href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/ptrace.c\"><$c$c>kernel/ptrace.c:<$c$c>ptrace_continue()处理大多数的细节。它完成通过调用 wake_up_state（儿童，__TASK_TRACED）（<一个href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/sched/core.c\"><$c$c>kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED，0））。

当一个进程正在通过 SIGSTOP 信号停止，所有的任务都将停止，并在停止了，不跟踪的状态结束

附加到每一项工作（通过PTRACE_ATTACH或PTRACE_SEIZE，请参阅<$c$c>kernel/ptrace.c:<$c$c>ptrace_attach())修改任务的状态，但是，ptrace的状态位（见<一href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/linux/ptrace.h\"><$c$c>include/linux/ptrace.h:PT_常量的）是从任务可运行状态位独立的（见<一href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/linux/sched.h\"><$c$c>include/linux/sched.h:TASK_常量的）。

附着到任务，和发送的处理的 SIGCONT 信号之后，停止状态是不立即修改（我相信），由于任务也被跟踪。这样做PTRACE_SINGLESTEP或PTRACE_CONT在<一结束href=\"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/sched/core.c\"><$c$c>kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED，0），该更新任务状态，任务移动到运行队列。

现在，我还没有找到code路径复杂的部分，是任务状态如何得到内核更新的任务时，下一个计划。我的测试表明，与单步执行（这是另一个任务状态标志），唯一的任务状态得到更新，同时要清除单步标志。似乎PTRACE_CONT是不可靠;我相信这是因为单步标志势力的任务状态变化。或许有一种竞争条件WRT。继续信号传递和状态变化？

（进一步编辑：内核开发人员肯定希望等待（）来调用，例如见的这个线程）

在换句话说，注意到该进程已停止后（注意，您可以使用 / proc /进程/ STAT 或 / proc /进程/状态如果这个过程是不是一个孩子，尚未连接到），我相信下面的过程是最强大的一种：

 将为pid_t PID，磷; / *处理拥有的任务* /
tid_t * TID; / *任务ID数组* /
为size_t TIDS; /* 任务 */
长期的结果;
INT状态;
为size_t我;对于（i = 0; I＆LT; TIDS，我++）{
    而（1）{
        结果= ptrace函数（PTRACE_ATTACH，TID [I]，（无效*）0（无效*）0）;
        如果（结果== -1L＆放大器;及（错误号== ESRCH ||错误号== || EBUSY errno的== || EFAULT错误号== EIO））{
            / *为了避免燃烧起来白白CPU：* /
            SCHED_YIELD（）; / *或了nanosleep（），或usleep（）函式* /
            继续;
        }
        打破;
    }
    如果（结果== -1L）{
        / *
         * 致命错误。首先从TID [0..i-1]，然后退出分离。
        * /
    }
}/ *发送SIGCONT的过程。 * /
如果（杀（PID，SIGCONT））{
    / *
     *致命错误，请参阅errno。出口。
    * /
}/ *因为我们正在连接到进程，
 *我们可以等待（）就可以了。 * /
而（1）{
    错误号= 0;
    状态= 0;
    P = waitpid函数（PID，和放大器;状态，WCONTINUED）;
    如果（第==（将为pid_t）-1）{
        如果（错误== EINTR）
            继续;
        其他
            打破;
    }其他
    如果（P！= PID）{
        错误号= ESRCH;
        打破;
    }其他
    如果（WIFCONTINUED（状态））{
        错误号= 0;
        打破;
    }
}
如果（错误）{
    / *
     * 致命错误。首先从TID [0..tids-1]，然后退出分离。
    * /
}/ *单步每个任务更新任务状态。 * /
对于（i = 0; I＆LT; TIDS，我++）{
    而（1）{
        结果= ptrace函数（PTRACE_SINGLESTEP，TID [I]，（无效*）0（无效*）0）;
        如果（结果== -1L＆放大器;＆放大器;错误号== ESRCH）{
            / *为了避免燃烧起来白白CPU：* /
            SCHED_YIELD（）; / *或了nanosleep（），或usleep（）函式* /
            继续;
        }
        打破;
    }
    如果（结果== -1L）{
        / *
         * 致命错误。首先从TID [0..i-1]，然后退出分离。
        * /
    }
}/ *获取任务寄存器的结构，以确保单步
 *已经完成了和他们的国家已经稳定下来。 * /
对于（i = 0; I＆LT; TIDS，我++）{
    结构user_regs_struct暂存器;    而（1）{
        结果= ptrace函数（PTRACE_GETREGS，TID [I]，和放大器;暂存器，＆安培;暂存器）;
        如果（结果== -1L＆放大器;及（错误号== ESRCH ||错误号== || EBUSY errno的== || EFAULT错误号== EIO））{
            / *为了避免燃烧起来白白CPU：* /
            SCHED_YIELD（）; / *或了nanosleep（），或usleep（）函式* /
            继续;
        }
        打破;
    }
    如果（结果== -1L）{
        / *
         * 致命错误。首先从TID [0..i-1]，然后退出分离。
        * /
    }
}

以上之后，所有的任务应附并在期望的状态，这样，例如PTRACE_CONT工作没有进一步的技巧。

如果在将来的内核行为的变化 - 我相信，STOP / CONT信号和ptracing之间的相互作用的东西，可能会改变;至少这个行为LKML开发商的问题将保证！ - 上面的程序仍然可以工作强劲的。（犯错误谨慎的一面，通过使用循环PTRACE_SINGLESTEP几次，也可能是一个好主意。）

要PTRACE_CONT的区别是，如果在今后的行为变化，初步PTRACE_CONT实际上可能继续处理，造成 ptrace的（）跟随它失败。随着PTRACE_SINGLESTEP，这个过程的将会的停止，进一步让 ptrace的（）调用成功。

问题？

While I was working on this question, I've come across a possible idea that uses ptrace, but I'm unable to get a proper understanding of how ptrace interacts with threads.

Suppose I have a given, multithreaded main process, and I want to attach to a specific thread in it (perhaps from a forked child).

Can I attach to a specific thread? (The manuals diverge on this question.)
If so, does that mean that single-stepping only steps through that one thread's instructions? Does it stop all the process's threads?
If so, do all the other threads remain stopped while I call PTRACE_SYSCALL or PTRACE_SINGLESTEP, or do all threads continue? Is there a way to step forward only in one single thread but guarantee that the other threads remain stopped?

Basically, I want to synchronise the original program by forcing all threads to stop, and then only execute a small set of single-threaded instructions by single-stepping the one traced thread.

My personal attempts so far look a bit like this:

pid_t target = syscall(SYS_gettid);   // get the calling thread's ID
pid_t pid = fork();

if (pid > 0)
{
    waitpid(pid, NULL, 0);            // synchronise main process

    important_instruction();
}
else if (pid == 0)
{
    ptrace(target, PTRACE_ATTACH, NULL, NULL);    // does this work?

    // cancel parent's "waitpid" call, e.g. with a signal

    // single-step to execute "important_instruction()" above

   ptrace(target, PTRACE_DETACH, NULL, NULL);     // parent's threads resume?

   _Exit(0);
}

However, I'm not sure, and can't find suitable references, that this is concurrently-correct and that important_instruction() is guaranteed to be executed only when all other threads are stopped. I also understand that there may be race conditions when the parent receives signals from elsewhere, and I heard that I should use PTRACE_SEIZE instead, but that doesn't seem to exist everywhere.

Any clarification or references would be greatly appreciated!

解决方案

I wrote a second test case. I had to add a separate answer, since it was too long to fit into the first one with example output included.

First, here is tracer.c:

#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <dirent.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <stdio.h>
#ifndef   SINGLESTEPS
#define   SINGLESTEPS 10
#endif

/* Similar to getline(), except gets process pid task IDs.
 * Returns positive (number of TIDs in list) if success,
 * otherwise 0 with errno set. */
size_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid)
{
    char     dirname[64];
    DIR     *dir;
    pid_t   *list;
    size_t   size, used = 0;

    if (!listptr || !sizeptr || pid < (pid_t)1) {
        errno = EINVAL;
        return (size_t)0;
    }

    if (*sizeptr > 0) {
        list = *listptr;
        size = *sizeptr;
    } else {
        list = *listptr = NULL;
        size = *sizeptr = 0;
    }

    if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) {
        errno = ENOTSUP;
        return (size_t)0;
    }

    dir = opendir(dirname);
    if (!dir) {
        errno = ESRCH;
        return (size_t)0;
    }

    while (1) {
        struct dirent *ent;
        int            value;
        char           dummy;

        errno = 0;
        ent = readdir(dir);
        if (!ent)
            break;

        /* Parse TIDs. Ignore non-numeric entries. */
        if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1)
            continue;

        /* Ignore obviously invalid entries. */
        if (value < 1)
            continue;

        /* Make sure there is room for another TID. */
        if (used >= size) {
            size = (used | 127) + 128;
            list = realloc(list, size * sizeof list[0]);
            if (!list) {
                closedir(dir);
                errno = ENOMEM;
                return (size_t)0;
            }
            *listptr = list;
            *sizeptr = size;
        }

        /* Add to list. */
        list[used++] = (pid_t)value;
    }
    if (errno) {
        const int saved_errno = errno;
        closedir(dir);
        errno = saved_errno;
        return (size_t)0;
    }
    if (closedir(dir)) {
        errno = EIO;
        return (size_t)0;
    }

    /* None? */
    if (used < 1) {
        errno = ESRCH;
        return (size_t)0;
    }

    /* Make sure there is room for a terminating (pid_t)0. */
    if (used >= size) {
        size = used + 1;
        list = realloc(list, size * sizeof list[0]);
        if (!list) {
            errno = ENOMEM;
            return (size_t)0;
        }
        *listptr = list;
        *sizeptr = size;
    }

    /* Terminate list; done. */
    list[used] = (pid_t)0;
    errno = 0;
    return used;
}


static int wait_process(const pid_t pid, int *const statusptr)
{
    int   status;
    pid_t p;

    do {
        status = 0;
        p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
    } while (p == (pid_t)-1 && errno == EINTR);
    if (p != pid)
        return errno = ESRCH;

    if (statusptr)
        *statusptr = status;

    return errno = 0;
}

static int continue_process(const pid_t pid, int *const statusptr)
{
    int   status;
    pid_t p;

    do {

        if (kill(pid, SIGCONT) == -1)
            return errno = ESRCH;

        do {
            status = 0;
            p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
        } while (p == (pid_t)-1 && errno == EINTR);

        if (p != pid)
            return errno = ESRCH;

    } while (WIFSTOPPED(status));

    if (statusptr)
        *statusptr = status;

    return errno = 0;
}

void show_registers(FILE *const out, pid_t tid, const char *const note)
{
    struct user_regs_struct regs;
    long                    r;

    do {
        r = ptrace(PTRACE_GETREGS, tid, &regs, &regs);
    } while (r == -1L && errno == ESRCH);
    if (r == -1L)
        return;

#if (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 64
    if (note && *note)
        fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx. %s\n", (int)tid, regs.rip, regs.rsp, note);
    else
        fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx.\n", (int)tid, regs.rip, regs.rsp);
#elif (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 32
    if (note && *note)
        fprintf(out, "Task %d: EIP=0x%08xx, ESP=0x%08x. %s\n", (int)tid, regs.eip, regs.rsp, note);
    else
        fprintf(out, "Task %d: EIP=0x%08xx, ESP=0x%08x.\n", (int)tid, regs.eip, regs.rsp);
#endif
}


int main(int argc, char *argv[])
{
    pid_t *tid = 0;
    size_t tids = 0;
    size_t tids_max = 0;
    size_t t, s;
    long   r;

    pid_t child;
    int   status;

    if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
        fprintf(stderr, "       %s COMMAND [ ARGS ... ]\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "This program executes COMMAND in a child process,\n");
        fprintf(stderr, "and waits for it to stop (via a SIGSTOP signal).\n");
        fprintf(stderr, "When that occurs, the register state of each thread\n");
        fprintf(stderr, "is dumped to standard output, then the child process\n");
        fprintf(stderr, "is sent a SIGCONT signal.\n");
        fprintf(stderr, "\n");
        return 1;
    }

    child = fork();
    if (child == (pid_t)-1) {
        fprintf(stderr, "fork() failed: %s.\n", strerror(errno));
        return 1;
    }

    if (!child) {
        prctl(PR_SET_DUMPABLE, (long)1);
        prctl(PR_SET_PTRACER, (long)getppid());
        fflush(stdout);
        fflush(stderr);
        execvp(argv[1], argv + 1);
        fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
        return 127;
    }

    fprintf(stderr, "Tracer: Waiting for child (pid %d) events.\n\n", (int)child);
    fflush(stderr);

    while (1) {

        /* Wait for a child event. */
        if (wait_process(child, &status))
            break;

        /* Exited? */
        if (WIFEXITED(status) || WIFSIGNALED(status)) {
            errno = 0;
            break;
        }

        /* At this point, only stopped events are interesting. */
        if (!WIFSTOPPED(status))
            continue;

        /* Obtain task IDs. */
        tids = get_tids(&tid, &tids_max, child);
        if (!tids)
            break;

        printf("Process %d has %d tasks,", (int)child, (int)tids);
        fflush(stdout);

        /* Attach to all tasks. */
        for (t = 0; t < tids; t++) {
            do {
                r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0);
            } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
            if (r == -1L) {
                const int saved_errno = errno;
                while (t-->0)
                    do {
                        r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
                    } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
                tids = 0;
                errno = saved_errno;
                break;
            }
        }
        if (!tids) {
            const int saved_errno = errno;
            if (continue_process(child, &status))
                break;
            printf(" failed to attach (%s).\n", strerror(saved_errno));
            fflush(stdout);
            if (WIFCONTINUED(status))
                continue;
            errno = 0;
            break;
        }

        printf(" attached to all.\n\n");
        fflush(stdout);

        /* Dump the registers of each task. */
        for (t = 0; t < tids; t++)
            show_registers(stdout, tid[t], "");
        printf("\n");
        fflush(stdout);

        for (s = 0; s < SINGLESTEPS; s++) {
            do {
                r = ptrace(PTRACE_SINGLESTEP, tid[tids-1], (void *)0, (void *)0);
            } while (r == -1L && errno == ESRCH);
            if (!r) {
                for (t = 0; t < tids - 1; t++)
                    show_registers(stdout, tid[t], "");
                show_registers(stdout, tid[tids-1], "Advanced by one step.");
                printf("\n");
                fflush(stdout);
            } else {
                fprintf(stderr, "Single-step failed: %s.\n", strerror(errno));
                fflush(stderr);
            }
        }

        /* Detach from all tasks. */
        for (t = 0; t < tids; t++)
            do {
                r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
            } while (r == -1 && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
        tids = 0;
        if (continue_process(child, &status))
            break;
        if (WIFCONTINUED(status)) {
            printf("Detached. Waiting for new stop events.\n\n");
            fflush(stdout);
            continue;
        }
        errno = 0;
        break;
    }
    if (errno)
        fprintf(stderr, "Tracer: Child lost (%s)\n", strerror(errno));
    else
    if (WIFEXITED(status))
        fprintf(stderr, "Tracer: Child exited (%d)\n", WEXITSTATUS(status));
    else
    if (WIFSIGNALED(status))
        fprintf(stderr, "Tracer: Child died from signal %d\n", WTERMSIG(status));
    else
        fprintf(stderr, "Tracer: Child vanished\n");
    fflush(stderr);

    return status;
}

tracer.c executes the specified command, waiting for the command to receive a SIGSTOP signal. (tracer.c does not send it itself; you can either have the tracee stop itself, or send the signal externally.)

When the command has stopped, tracer.c attaches a ptrace to every thread, and single-steps one of the threads a fixed number of steps (SINGLESTEPS compile-time constant), showing the pertinent register state for each thread.

After that, it detaches from the command, and sends it a SIGCONT signal to let it continue its operation normally.

Here is a simple test program, worker.c, I used for testing:

#include <pthread.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>

#ifndef   THREADS
#define   THREADS  2
#endif

volatile sig_atomic_t   done = 0;

void catch_done(int signum)
{
    done = signum;
}

int install_done(const int signum)
{
    struct sigaction act;

    sigemptyset(&act.sa_mask);
    act.sa_handler = catch_done;
    act.sa_flags = 0;
    if (sigaction(signum, &act, NULL))
        return errno;
    else
        return 0;
}

void *worker(void *data)
{
    volatile unsigned long *const counter = data;

    while (!done)
        __sync_add_and_fetch(counter, 1UL);

    return (void *)(unsigned long)__sync_or_and_fetch(counter, 0UL);
}

int main(void)
{
    unsigned long   counter = 0UL;
    pthread_t       thread[THREADS];
    pthread_attr_t  attrs;
    size_t          i;

    if (install_done(SIGHUP) ||
        install_done(SIGTERM) ||
        install_done(SIGUSR1)) {
        fprintf(stderr, "Worker: Cannot install signal handlers: %s.\n", strerror(errno));
        return 1;
    }

    pthread_attr_init(&attrs);
    pthread_attr_setstacksize(&attrs, 65536);
    for (i = 0; i < THREADS; i++)
        if (pthread_create(&thread[i], &attrs, worker, &counter)) {
            done = 1;
            fprintf(stderr, "Worker: Cannot create thread: %s.\n", strerror(errno));
            return 1;
        }
    pthread_attr_destroy(&attrs);

    /* Let the original thread also do the worker dance. */
    worker(&counter);

    for (i = 0; i < THREADS; i++)
        pthread_join(thread[i], NULL);

    return 0;
}

Compile both using e.g.

gcc -W -Wall -O3 -fomit-frame-pointer worker.c -pthread -o worker
gcc -W -Wall -O3 -fomit-frame-pointer tracer.c -o tracer

and run either in a separate terminal, or on the background, using e.g.

./tracer ./worker &

The tracer shows the PID of the worker:

Tracer: Waiting for child (pid 24275) events.

At this point, the child is running normally. The action starts when you send a SIGSTOP to the child. The tracer detects it, does the desired tracing, then detaches and lets the child continue normally:

kill -STOP 24275

Process 24275 has 3 tasks, attached to all.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.

Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.

Detached. Waiting for new stop events.

You can repeat the above as many times as you wish. Note that I picked the SIGSTOP signal as the trigger, because this way tracer.c is also useful as a basis for generating complex multithreaded core dumps per request (as the multithreaded process can simply trigger it by sending itself a SIGSTOP).

The disassembly of the worker() function the threads are all spinning in the above example:

0x400a50: eb 0b                 jmp          0x400a5d
0x400a52: 66 0f 1f 44 00 00     nopw         0x0(%rax,%rax,1)
0x400a58: f0 48 83 07 01        lock addq    $0x1,(%rdi)          = fourth step
0x400a5d: 8b 05 00 00 00 00     mov          0x0(%rip),%eax       = first step
0x400a63: 85 c0                 test         %eax,%eax            = second step
0x400a65: 74 f1                 je           0x400a58             = third step
0x400a67: 48 8b 07              mov          (%rdi),%rax
0x400a6a: 48 89 c2              mov          %rax,%rdx
0x400a6d: f0 48 0f b1 07        lock cmpxchg %rax,(%rdi)
0x400a72: 75 f6                 jne          0x400a6a
0x400a74: 48 89 d0              mov          %rdx,%rax
0x400a77: c3                    retq

Now, this test program does only show how to stop a process, attach to all of its threads, single-step one of the threads a desired number of instructions, then letting all the threads continue normally; it does not yet prove that the same applies for letting specific threads continue normally (via PTRACE_CONT). However, the detail I describe below indicates, to me, that the same approach should work fine for PTRACE_CONT.

The main problem or surprise I encountered while writing the above test programs was the necessity of the

long r;

do {
    r = ptrace(PTRACE_cmd, tid, ...);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));

loop, especially for the ESRCH case (the others I only added due to the ptrace man page description).

You see, most ptrace commands are only allowed when the task is stopped. However, the task is not stopped when it is still completing e.g. a single-step command. Thus, using the above loop -- perhaps adding a millisecond nanosleep or similar to avoid wasting CPU -- makes sure the previous ptrace command has completed (and thus the task stopped) before we try to supply the new one.

Kerrek SB, I do believe at least some of the troubles you've had with your test programs are due to this issue? To me, personally, it was a kind of a D'oh! moment to realize that of course this is necessary, as ptracing is inherently asynchronous, not synchronous.

(This asynchronicity is also the cause for the SIGCONT-PTRACE_CONT interaction I mentioned above. I do believe with proper handling using the loop shown above, that interaction is no longer a problem -- and is actually quite understandable.)

Adding to the comments to this answer:

The Linux kernel uses a set of task state flags in the task_struct structure (see include/linux/sched.h for definition) to keep track of the state of each task. The userspace-facing side of ptrace() is defined in kernel/ptrace.c.

When PTRACE_SINGLESTEP or PTRACE_CONT is called, kernel/ptrace.c:ptrace_continue() handles most of the details. It finishes by calling wake_up_state(child, __TASK_TRACED) (kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)).

When a process is stopped via SIGSTOP signal, all tasks will be stopped, and end up in the "stopped, not traced" state.

Attaching to every task (via PTRACE_ATTACH or PTRACE_SEIZE, see kernel/ptrace.c:ptrace_attach()) modifies the task state. However, ptrace state bits (see include/linux/ptrace.h:PT_ constants) are separate from the task runnable state bits (see include/linux/sched.h:TASK_ constants).

After attaching to the tasks, and sending the process a SIGCONT signal, the stopped state is not immediately modified (I believe), since the task is also being traced. Doing PTRACE_SINGLESTEP or PTRACE_CONT ends up in kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0), which updates the task state, and moves the task to the run queue.

Now, the complicated part that I haven't yet found the code path, is how the task state gets updated in the kernel when the task is next scheduled. My tests indicate that with single-stepping (which is yet another task state flag), only the task state gets updated, with the single-step flag cleared. It seems that PTRACE_CONT is not as reliable; I believe it is because the single-step flag "forces" that task state change. Perhaps there is a "race condition" wrt. the continue signal delivery and state change?

(Further edit: the kernel developers definitely expect wait() to be called, see for example this thread.)

In other words, after noticing that the process has stopped (note that you can use /proc/PID/stat or /proc/PID/status if the process is not a child, and not yet attached to), I believe the following procedure is the most robust one:

pid_t  pid, p; /* Process owning the tasks */
tid_t *tid;    /* Task ID array */
size_t tids;   /* Tasks */
long   result;
int    status;
size_t i;

for (i = 0; i < tids; i++) {
    while (1) {
        result = ptrace(PTRACE_ATTACH, tid[i], (void *)0, (void *)0);
        if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
            /* To avoid burning up CPU for nothing: */
            sched_yield(); /* or nanosleep(), or usleep() */
            continue;
        }
        break;
    }       
    if (result == -1L) {
        /*
         * Fatal error. First detach from tid[0..i-1], then exit.
        */
    }
}

/* Send SIGCONT to the process. */
if (kill(pid, SIGCONT)) {
    /*
     * Fatal error, see errno. Exit.
    */
}

/* Since we are attached to the process,
 * we can wait() on it. */
while (1) {
    errno = 0;
    status = 0;
    p = waitpid(pid, &status, WCONTINUED);
    if (p == (pid_t)-1) {
        if (errno == EINTR)
            continue;
        else
            break;
    } else
    if (p != pid) {
        errno = ESRCH;
        break;
    } else
    if (WIFCONTINUED(status)) {
        errno = 0;
        break;
    }
}
if (errno) {
    /*
     * Fatal error. First detach from tid[0..tids-1], then exit.
    */
}

/* Single-step each task to update the task states. */
for (i = 0; i < tids; i++) {
    while (1) {
        result = ptrace(PTRACE_SINGLESTEP, tid[i], (void *)0, (void *)0);
        if (result == -1L && errno == ESRCH) {
            /* To avoid burning up CPU for nothing: */
            sched_yield(); /* or nanosleep(), or usleep() */
            continue;
        }
        break;
    }       
    if (result == -1L) {
        /*
         * Fatal error. First detach from tid[0..i-1], then exit.
        */
    }
}

/* Obtain task register structures, to make sure the single-steps
 * have completed and their states have stabilized. */
for (i = 0; i < tids; i++) {
    struct user_regs_struct regs;

    while (1) {
        result = ptrace(PTRACE_GETREGS, tid[i], &regs, &regs);
        if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
            /* To avoid burning up CPU for nothing: */
            sched_yield(); /* or nanosleep(), or usleep() */
            continue;
        }
        break;
    }       
    if (result == -1L) {
        /*
         * Fatal error. First detach from tid[0..i-1], then exit.
        */
    }
}

After the above, all tasks should be attached and in the expected state, so that e.g. PTRACE_CONT works without further tricks.

If the behaviour changes in future kernels -- I do believe the interaction between the STOP/CONT signals and ptracing is something that might change; at least a question to the LKML developers about this behaviour would be warranted! --, the above procedure will still work robustly. (Erring on the side of caution, by using a loop to PTRACE_SINGLESTEP a few times, might also be a good idea.)

The difference to PTRACE_CONT is that if the behaviour changes in the future, the initial PTRACE_CONT might actually continue the process, causing the ptrace() that follow it to fail. With PTRACE_SINGLESTEP, the process will stop, allowing further ptrace() calls to succeed.

Questions?

这篇关于如何使用ptrace函数来获得多线程的一致看法？的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

如何使用ptrace函数来获得多线程的一致看法？ [英] How to use PTRACE to get a consistent view of multiple threads?

问题描述

相关文章

服务器开发最新文章

热门教程

热门工具

登录关闭

如何使用ptrace函数来获得多线程的一致看法？ [英] How to use PTRACE to get a consistent view of multiple threads?

问题描述

相关文章

服务器开发最新文章

热门教程

热门工具

登录 关闭

登录关闭