1 [node18:02644] [[37701,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
2 [node18:02644] [[37701,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
3 [node18:02644] [[37701,0],0] ORTE_ERROR_LOG: File open fail[node18:02645] [[37700,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
4 [node18:02645] [[37700,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
5 [node18:02645] [[37700,0],0] ORTE_ERROR_LOG: File open fail[node18:02633] [[37720,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
6 [node18:02633] [[37720,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
7 [node18:02633] [[37720,0],0] ORTE_ERROR_LOG: File open fail[node18:02635] [[37722,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
8 [node18:02635] [[37722,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
9 [node18:02635] [[37722,0],0] ORTE_ERROR_LOG: File open fail[node18:02646] [[37703,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
10 [node18:02646] [[37703,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
11 [node18:02646] [[37703,0],0] ORTE_ERROR_LOG: File open fail[node18:02643] [[37698,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
12 [node18:02643] [[37698,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
13 [node18:02643] [[37698,0],0] ORTE_ERROR_LOG: File open fail[node18:02647] [[37702,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
14 [node18:02647] [[37702,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
15 [node18:02647] [[37702,0],0] ORTE_ERROR_LOG: File open fail[node18:02637] [[37724,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
16 [node18:02637] [[37724,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
17 [node18:02637] [[37724,0],0] ORTE_ERROR_LOG: File open fail[node18:02641] [[37696,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
18 [node18:02641] [[37696,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
19 [node18:02641] [[37696,0],0] ORTE_ERROR_LOG: File open fail[node18:02636] [[37725,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
20 [node18:02636] [[37725,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
21 [node18:02636] [[37725,0],0] ORTE_ERROR_LOG: File open fail[node18:02634] [[37723,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
22 [node18:02634] [[37723,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
23 [node18:02634] [[37723,0],0] ORTE_ERROR_LOG: File open fail[node18:02640] [[37697,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
24 [node18:02640] [[37697,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
25 [node18:02640] [[37697,0],0] ORTE_ERROR_LOG: File open fail[node18:02638] [[37727,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
26 [node18:02638] [[37727,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
27 [node18:02638] [[37727,0],0] ORTE_ERROR_LOG: File open fail[node18:02648] [[37705,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
28 [node18:02648] [[37705,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
29 [node18:02648] [[37705,0],0] ORTE_ERROR_LOG: File open fail[node18:02632] [[37721,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
30 [node18:02632] [[37721,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
31 [node18:02632] [[37721,0],0] ORTE_ERROR_LOG: File open fail[node18:02642] [[37699,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 142
32 [node18:02642] [[37699,0],0] ORTE_ERROR_LOG: File open failure in file ras_tm_module.c at line 82
33 [node18:02642] [[37699,0],0] ORTE_ERROR_LOG: File open failure in file base/ras_base_allocate.c at line 149
34 [node18:02644] [[37701,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
35 ure in file base/ras_base_allocate.c at line 149
36 [node18:02645] [[37700,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
37 ure in file base/ras_base_allocate.c at line 149
38 [node18:02633] [[37720,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
39 ure in file base/ras_base_allocate.c at line 149
40 [node18:02635] [[37722,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
41 ure in file base/ras_base_allocate.c at line 149
42 [node18:02646] [[37703,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
43 ure in file base/ras_base_allocate.c at line 149
44 [node18:02643] [[37698,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
45 ure in file base/ras_base_allocate.c at line 149
46 [node18:02647] [[37702,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
47 ure in file base/ras_base_allocate.c at line 149
48 [node18:02637] [[37724,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
49 ure in file base/ras_base_allocate.c at line 149
50 [node18:02641] [[37696,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
51 ure in file base/ras_base_allocate.c at line 149
52 [node18:02636] [[37725,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
53 ure in file base/ras_base_allocate.c at line 149
54 [node18:02634] [[37723,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
55 ure in file base/ras_base_allocate.c at line 149
56 [node18:02640] [[37697,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
57 ure in file base/ras_base_allocate.c at line 149
58 [node18:02638] [[37727,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
59 ure in file base/ras_base_allocate.c at line 149
60 [node18:02648] [[37705,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
61 ure in file base/ras_base_allocate.c at line 149
62 [node18:02632] [[37721,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
63 ure in file base/ras_base_allocate.c at line 149
64 [node19:02642] [[37699,0],0] ORTE_ERROR_LOG: File open failure in file orted/orted_main.c at line 574
~
在我提交一份作业并结束后,我在一个错误文件中收到 64 条此类消息。作业完成后,我应该在它创建的文件之一中获得一些数字。相反,我得到了一些 NaN 和一些数字。
根据记录,我的集群中的节点 18 工作正常。每次,消息都有不同的节点号(几天前是 3 个,我尝试重做所有计算)
我用谷歌搜索了错误。看起来 ORTE_ERROR 与 MPI 包有关。我的集群帐户中安装的 MPI 版本是 1.6.5。
您认为我输入的某些值有问题或者是否有任何包丢失或过时?