我有一个大约 1000 行的大文件 run_simulation.csh
#!/bin/csh
set config_dir = /proj/ABC/users/nhannguyen/work/verif/qc/input
set testbench_dir = /proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c
bsub $testbench_dir/cell/delay_0_0.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_0_1.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_0_2.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_0_3.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_1_0.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_1_1.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_1_2.sp.py -c $config_dir/sim.config.py -m 1
bsub $testbench_dir/cell/delay_1_3.sp.py -c $config_dir/sim.config.py -m 1
...............
我想将此文件拆分为 50 行的小文件,并且它们必须具有大文件 run_simulation.csh 的前 3 行。如果可能,这些小文件的扩展名为 .csh。我该如何进行这种分割?通过命令?
#!/bin/csh
set config_dir = /proj/ABC/users/nhannguyen/work/verif/qc/input
set testbench_dir = /proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c
答案1
#!/bin/bash
# step 1: Remove the header:
tail -n +4 ./bigfile.csh > bigfile_without_header.csh
# step 2: Split the file:
split -d -l 1000 --additional-suffix=.csh ./bigfile_without_header.csh split-
# step 3: Add the header back to each file:
HEADER='#!/bin/csh\nset config_dir = /proj/ABC/users/nhannguyen/work/verif/qc/input\nset testbench_dir = /proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c\n'
sed -i "1s,^,$HEADER," split-*.csh
答案2
一个简单的例子,测试如下bash
:
#! /bin/bash
# first sub file index
j=1
# first line after header
i=4
# 50 lines per sub file
lines=50
# lines in file to split
total_lines=$(cat file_to_cut | wc -l)
while [ $i -lt $total_lines ]
do
# copy header
head -n 3 example > sub_file_$j
# copy data
tail -n +$i fite_to_cut | head -n $lines >> sub_file_$j
# prepare next file
j=$((j+1))
# prepare next line to read
i=$((i+$lines))
done
答案3
假设您想要bsub
每个与模式匹配的脚本"$testbench_dir"/cell/delay_*_*.sp.py
,那么您可以将脚本替换为以下内容:
#!/bin/sh
config_dir=/proj/ABC/users/nhannguyen/work/verif/qc/input
testbench_dir=/proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c
for py in "$testbench_dir"/cell/delay_*_*.sp.py; do
bsub "$py" -c "$config_dir/sim.config.py" -m 1
done
这是一个/bin/sh
脚本,不是一个csh
脚本,但这应该不重要。
如果您需要确保脚本按特定顺序执行(上面将按字典顺序对脚本文件进行排序),则执行双循环:
#!/bin/sh
config_dir=/proj/ABC/users/nhannguyen/work/verif/qc/input
testbench_dir=/proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c
maxi=300 # the largest number I in delay_I_J.sp.py
maxj=3 # the largest number J in delay_I_J.sp.py
i=0
until [ "$i" -gt "$maxi" ]; do
j=0
until [ "$j" -gt "$maxj" ]; do
bsub "$testbench_dir/cell/delay_${i}_${j}.sp.py" -c "$config_dir/sim.config.py" -m 1
j=$(( j + 1 ))
done
i=$(( i + 1 ))
done
如果您希望脚本仅以 50 个批次提交作业,并且能够在命令行上告诉它要提交哪个批次,例如
./script 3
(将运行批次 3,即作业 100-149)
#!/bin/sh
batch=$1
if [ -z "$batch" ]; then
printf 'Usage: %s batchnumber\n' "$0" >&2
exit 1
fi
bstart=$(( (batch - 1)*50 ))
bend=$(( batch*50 - 1 ))
printf 'Submitting batch %d (jobs %d to %d)\n' "$batch" "$bstart" "$bend"
config_dir=/proj/ABC/users/nhannguyen/work/verif/qc/input
testbench_dir=/proj/ABC/users/nhannguyen/work/verif/qc/testbench/TT_p025c
count=0
for py in "$testbench_dir"/cell/delay_*_*.sp.py; do
if [ "$count" -gt "$bend" ]; then
break
fi
if [ "$count" -ge "$bstart" ]; then
bsub "$py" -c "$config_dir/sim.config.py" -m 1
fi
count=$(( count + 1 ))
done