Linux：调度进程和线程

Question 1

我测试了你的观察结果，至少在最近的内核上它是错误的。我写了这个代码。

#define _GNU_SOURCE
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <err.h>

#include <pthread.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>

#define TIMEOUT 4

void print_usage(
    char *type)
{
  struct rusage use;

  getrusage(RUSAGE_THREAD, &use);

  float total_time = 0;
  long total_sw = 0;
  total_time += use.ru_utime.tv_sec + ((float)use.ru_utime.tv_usec / 1000000);
  total_time += use.ru_stime.tv_sec + ((float)use.ru_stime.tv_usec / 1000000);

  total_sw = use.ru_nvcsw + use.ru_nivcsw;

  printf("Type: %s, CPU Time: %.3f seconds, Total context switches: %d\n",
         type, total_time, total_sw);

  return;
} 

struct worksync {
  pthread_spinlock_t spin;
};

void * spinner_thread(
    void *data)
{
  struct worksync *sync = (struct worksync *)data;

  pthread_spin_lock(&sync->spin);
  print_usage("Thread");
  pthread_spin_unlock(&sync->spin);

  pthread_exit(0);
}

void spawn_threaded_worker(
    int ncpu,
    int timeout)
{
  pid_t pid;

  pid = fork();
  if (pid < 0)
    err(EXIT_FAILURE, "fork failed");
  if (pid == 0) {

    /* allocate and initialize structures */
    pthread_t *threads = alloca(sizeof(pthread_t) * ncpu);
    struct worksync sync;
    int i;

    pthread_spin_init(&sync.spin, PTHREAD_PROCESS_PRIVATE);

    assert(threads);

    for (i=0; i < ncpu; i++) {
      pthread_create(&threads[i], NULL, spinner_thread, (void *)&sync);
    }

    pthread_spin_lock(&sync.spin);

    sleep(timeout);
    pthread_spin_unlock(&sync.spin);

    for (i=0; i < ncpu; i++) 
      pthread_join(threads[i], NULL);

    exit(0);
  }
}

void spinner_process(
    struct worksync *sync)
{
  pthread_spin_lock(&sync->spin);
  print_usage("Process");
  pthread_spin_unlock(&sync->spin);
  exit(0);
}

void spawn_forked_worker(
    int ncpu,
    int timeout)
{
  int i;
  int status;
  pid_t pid;
  pid = fork();
  if (pid < 0)
    err(EXIT_FAILURE, "fork failed");

  if (pid == 0) {
    pid_t *pids = alloca(sizeof(pid_t) * ncpu);
    struct worksync *sync = mmap(NULL, sizeof(struct worksync),
                           PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
    assert(sync != MAP_FAILED);
    pthread_spin_init(&sync->spin, PTHREAD_PROCESS_SHARED);
    pthread_spin_lock(&sync->spin);

    for (i=0; i < ncpu; i++) {
      pids[i] = fork();
      if (pids[i] < 0)
        abort();

      if (pids[i] == 0)
        spinner_process(sync);
    }

    sleep(timeout);
    pthread_spin_unlock(&sync->spin);

    for (i=0; i < ncpu; i++) 
       wait(&status);
    exit(0);
  }
}


int main(
    void)
{
  int ncpu;
  int status;
  ncpu = sysconf(_SC_NPROCESSORS_ONLN);
  assert(ncpu > 0);

  printf("Running %d threads and %d processes for %d seconds\n", ncpu, ncpu, TIMEOUT);
  spawn_threaded_worker(ncpu, TIMEOUT);
  spawn_forked_worker(ncpu, TIMEOUT);

  wait(&status);
  wait(&status);

  exit(0);
}

它测量在线程模型和分叉模型中执行 CPU 密集型工作（在自旋锁中旋转）所花费的 CPU 时间，同时使用所有系统 CPU。然后报告 CPU 统计信息。

我的结果显示在 4 CPU 盒子上：

自动分组已禁用

$ ./schedtest 
Running 4 threads and 4 processes for 4 seconds
Type: Thread, CPU Time: 1.754 seconds, Total context switches: 213
Type: Thread, CPU Time: 1.758 seconds, Total context switches: 208
Type: Thread, CPU Time: 1.755 seconds, Total context switches: 217
Type: Process, CPU Time: 1.768 seconds, Total context switches: 251
Type: Process, CPU Time: 1.759 seconds, Total context switches: 209
Type: Thread, CPU Time: 1.772 seconds, Total context switches: 258
Type: Process, CPU Time: 1.752 seconds, Total context switches: 215
Type: Process, CPU Time: 1.756 seconds, Total context switches: 225

启用自动分组功能

$ ./schedtest 
Running 4 threads and 4 processes for 4 seconds
Type: Thread, CPU Time: 0.495 seconds, Total context switches: 167
Type: Thread, CPU Time: 0.496 seconds, Total context switches: 167
Type: Thread, CPU Time: 0.430 seconds, Total context switches: 145
Type: Process, CPU Time: 0.430 seconds, Total context switches: 148
Type: Process, CPU Time: 0.440 seconds, Total context switches: 149
Type: Process, CPU Time: 0.440 seconds, Total context switches: 150
Type: Thread, CPU Time: 0.457 seconds, Total context switches: 153
Type: Process, CPU Time: 0.430 seconds, Total context switches: 144

您可以清楚地看到，内核没有区分线程和进程。

我不知道你在做什么，但无论它是什么，它都不符合 Linux 的工作方式，至少对我来说是这样。

Answer

我测试了你的观察结果，至少在最近的内核上它是错误的。我写了这个代码。

#define _GNU_SOURCE
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <err.h>

#include <pthread.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>

#define TIMEOUT 4

void print_usage(
    char *type)
{
  struct rusage use;

  getrusage(RUSAGE_THREAD, &use);

  float total_time = 0;
  long total_sw = 0;
  total_time += use.ru_utime.tv_sec + ((float)use.ru_utime.tv_usec / 1000000);
  total_time += use.ru_stime.tv_sec + ((float)use.ru_stime.tv_usec / 1000000);

  total_sw = use.ru_nvcsw + use.ru_nivcsw;

  printf("Type: %s, CPU Time: %.3f seconds, Total context switches: %d\n",
         type, total_time, total_sw);

  return;
} 

struct worksync {
  pthread_spinlock_t spin;
};

void * spinner_thread(
    void *data)
{
  struct worksync *sync = (struct worksync *)data;

  pthread_spin_lock(&sync->spin);
  print_usage("Thread");
  pthread_spin_unlock(&sync->spin);

  pthread_exit(0);
}

void spawn_threaded_worker(
    int ncpu,
    int timeout)
{
  pid_t pid;

  pid = fork();
  if (pid < 0)
    err(EXIT_FAILURE, "fork failed");
  if (pid == 0) {

    /* allocate and initialize structures */
    pthread_t *threads = alloca(sizeof(pthread_t) * ncpu);
    struct worksync sync;
    int i;

    pthread_spin_init(&sync.spin, PTHREAD_PROCESS_PRIVATE);

    assert(threads);

    for (i=0; i < ncpu; i++) {
      pthread_create(&threads[i], NULL, spinner_thread, (void *)&sync);
    }

    pthread_spin_lock(&sync.spin);

    sleep(timeout);
    pthread_spin_unlock(&sync.spin);

    for (i=0; i < ncpu; i++) 
      pthread_join(threads[i], NULL);

    exit(0);
  }
}

void spinner_process(
    struct worksync *sync)
{
  pthread_spin_lock(&sync->spin);
  print_usage("Process");
  pthread_spin_unlock(&sync->spin);
  exit(0);
}

void spawn_forked_worker(
    int ncpu,
    int timeout)
{
  int i;
  int status;
  pid_t pid;
  pid = fork();
  if (pid < 0)
    err(EXIT_FAILURE, "fork failed");

  if (pid == 0) {
    pid_t *pids = alloca(sizeof(pid_t) * ncpu);
    struct worksync *sync = mmap(NULL, sizeof(struct worksync),
                           PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
    assert(sync != MAP_FAILED);
    pthread_spin_init(&sync->spin, PTHREAD_PROCESS_SHARED);
    pthread_spin_lock(&sync->spin);

    for (i=0; i < ncpu; i++) {
      pids[i] = fork();
      if (pids[i] < 0)
        abort();

      if (pids[i] == 0)
        spinner_process(sync);
    }

    sleep(timeout);
    pthread_spin_unlock(&sync->spin);

    for (i=0; i < ncpu; i++) 
       wait(&status);
    exit(0);
  }
}


int main(
    void)
{
  int ncpu;
  int status;
  ncpu = sysconf(_SC_NPROCESSORS_ONLN);
  assert(ncpu > 0);

  printf("Running %d threads and %d processes for %d seconds\n", ncpu, ncpu, TIMEOUT);
  spawn_threaded_worker(ncpu, TIMEOUT);
  spawn_forked_worker(ncpu, TIMEOUT);

  wait(&status);
  wait(&status);

  exit(0);
}

它测量在线程模型和分叉模型中执行 CPU 密集型工作（在自旋锁中旋转）所花费的 CPU 时间，同时使用所有系统 CPU。然后报告 CPU 统计信息。

我的结果显示在 4 CPU 盒子上：

自动分组已禁用

$ ./schedtest 
Running 4 threads and 4 processes for 4 seconds
Type: Thread, CPU Time: 1.754 seconds, Total context switches: 213
Type: Thread, CPU Time: 1.758 seconds, Total context switches: 208
Type: Thread, CPU Time: 1.755 seconds, Total context switches: 217
Type: Process, CPU Time: 1.768 seconds, Total context switches: 251
Type: Process, CPU Time: 1.759 seconds, Total context switches: 209
Type: Thread, CPU Time: 1.772 seconds, Total context switches: 258
Type: Process, CPU Time: 1.752 seconds, Total context switches: 215
Type: Process, CPU Time: 1.756 seconds, Total context switches: 225

启用自动分组功能

$ ./schedtest 
Running 4 threads and 4 processes for 4 seconds
Type: Thread, CPU Time: 0.495 seconds, Total context switches: 167
Type: Thread, CPU Time: 0.496 seconds, Total context switches: 167
Type: Thread, CPU Time: 0.430 seconds, Total context switches: 145
Type: Process, CPU Time: 0.430 seconds, Total context switches: 148
Type: Process, CPU Time: 0.440 seconds, Total context switches: 149
Type: Process, CPU Time: 0.440 seconds, Total context switches: 150
Type: Thread, CPU Time: 0.457 seconds, Total context switches: 153
Type: Process, CPU Time: 0.430 seconds, Total context switches: 144

您可以清楚地看到，内核没有区分线程和进程。

我不知道你在做什么，但无论它是什么，它都不符合 Linux 的工作方式，至少对我来说是这样。

Question 2

我认为你所看到的是“自动分组”功能的加拿大食品安全局调度程序，它尝试对共享同一“会话”（如通过调用启动的会话setsid()）的进程（和线程）进行分组。

（我在这里做出的假设是，您在每个单独的会话中启动 48 个单线程进程。）

您可以尝试使用此命令禁用“自动分组”功能，看看它是否会改变您所看到的行为：

echo 0 >/proc/sys/kernel/sched_autogroup_enabled

查看sched(7) 手册页中有关 autogroup 的部分更多细节。

Answer

我认为你所看到的是“自动分组”功能的加拿大食品安全局调度程序，它尝试对共享同一“会话”（如通过调用启动的会话setsid()）的进程（和线程）进行分组。

（我在这里做出的假设是，您在每个单独的会话中启动 48 个单线程进程。）

您可以尝试使用此命令禁用“自动分组”功能，看看它是否会改变您所看到的行为：

echo 0 >/proc/sys/kernel/sched_autogroup_enabled

查看sched(7) 手册页中有关 autogroup 的部分更多细节。

Linux：调度进程和线程

答案1

答案2

相关内容