在 Unix shell 中提取子字符串的最简单方法？

Question 1

cut可能会有用：

$ echo hello | cut -c1,3
hl
$ echo hello | cut -c1-3
hel
$ echo hello | cut -c1-4
hell
$ echo hello | cut -c4-5
lo

Shell Builtins 也适用于此，这里有一个示例脚本：

#!/bin/bash
# Demonstrates shells built in ability to split stuff.  Saves on
# using sed and awk in shell scripts. Can help performance.

shopt -o nounset
declare -rx       FILENAME=payroll_2007-06-12.txt

# Splits
declare -rx   NAME_PORTION=${FILENAME%.*}     # Left of .
declare -rx      EXTENSION=${FILENAME#*.}     # Right of .
declare -rx           NAME=${NAME_PORTION%_*} # Left of _
declare -rx           DATE=${NAME_PORTION#*_} # Right of _
declare -rx     YEAR_MONTH=${DATE%-*}         # Left of _
declare -rx           YEAR=${YEAR_MONTH%-*}   # Left of _
declare -rx          MONTH=${YEAR_MONTH#*-}   # Left of _
declare -rx            DAY=${DATE##*-}        # Left of _

clear

echo "  Variable: (${FILENAME})"
echo "  Filename: (${NAME_PORTION})"
echo " Extension: (${EXTENSION})"
echo "      Name: (${NAME})"
echo "      Date: (${DATE})"
echo "Year/Month: (${YEAR_MONTH})"
echo "      Year: (${YEAR})"
echo "     Month: (${MONTH})"
echo "       Day: (${DAY})"

输出：

  Variable: (payroll_2007-06-12.txt)
  Filename: (payroll_2007-06-12)
 Extension: (txt)
      Name: (payroll)
      Date: (2007-06-12)
Year/Month: (2007-06)
      Year: (2007)
     Month: (06)
       Day: (12)

并且根据上面的 Gnudif，当事情变得非常艰难时，总会有 sed/awk/perl。

Answer

cut可能会有用：

$ echo hello | cut -c1,3
hl
$ echo hello | cut -c1-3
hel
$ echo hello | cut -c1-4
hell
$ echo hello | cut -c4-5
lo

Shell Builtins 也适用于此，这里有一个示例脚本：

#!/bin/bash
# Demonstrates shells built in ability to split stuff.  Saves on
# using sed and awk in shell scripts. Can help performance.

shopt -o nounset
declare -rx       FILENAME=payroll_2007-06-12.txt

# Splits
declare -rx   NAME_PORTION=${FILENAME%.*}     # Left of .
declare -rx      EXTENSION=${FILENAME#*.}     # Right of .
declare -rx           NAME=${NAME_PORTION%_*} # Left of _
declare -rx           DATE=${NAME_PORTION#*_} # Right of _
declare -rx     YEAR_MONTH=${DATE%-*}         # Left of _
declare -rx           YEAR=${YEAR_MONTH%-*}   # Left of _
declare -rx          MONTH=${YEAR_MONTH#*-}   # Left of _
declare -rx            DAY=${DATE##*-}        # Left of _

clear

echo "  Variable: (${FILENAME})"
echo "  Filename: (${NAME_PORTION})"
echo " Extension: (${EXTENSION})"
echo "      Name: (${NAME})"
echo "      Date: (${DATE})"
echo "Year/Month: (${YEAR_MONTH})"
echo "      Year: (${YEAR})"
echo "     Month: (${MONTH})"
echo "       Day: (${DAY})"

输出：

  Variable: (payroll_2007-06-12.txt)
  Filename: (payroll_2007-06-12)
 Extension: (txt)
      Name: (payroll)
      Date: (2007-06-12)
Year/Month: (2007-06)
      Year: (2007)
     Month: (06)
       Day: (12)

并且根据上面的 Gnudif，当事情变得非常艰难时，总会有 sed/awk/perl。

Question 2

Unix shell 传统上不内置正则表达式支持。Bash 和 Zsh 都有，因此如果您使用=~运算符将字符串与正则表达式进行比较，则：

您可以从$BASH_REMATCHbash 中的数组中获取子字符串。

在 Zsh 中，如果BASH_REMATCH设置了 shell 选项，则值位于$BASH_REMATCH数组中，否则位于$MATCH/$match绑定的变量对中（一个标量，另一个数组）。如果RE_MATCH_PCRE设置了选项，则使用 PCRE 引擎，否则使用系统正则表达式库，进行扩展正则表达式语法匹配，就像 bash 一样。

因此，最简单的方法是：如果你使用 bash：

if [[ "$variable" =~ unquoted.*regex ]]; then
  matched_portion="${BASH_REMATCH[0]}"
  first_substring="${BASH_REMATCH[1]}"
fi

如果您不使用 Bash 或 Zsh，情况会变得更加复杂，因为您需要使用外部命令。

Answer

Unix shell 传统上不内置正则表达式支持。Bash 和 Zsh 都有，因此如果您使用=~运算符将字符串与正则表达式进行比较，则：

您可以从$BASH_REMATCHbash 中的数组中获取子字符串。

在 Zsh 中，如果BASH_REMATCH设置了 shell 选项，则值位于$BASH_REMATCH数组中，否则位于$MATCH/$match绑定的变量对中（一个标量，另一个数组）。如果RE_MATCH_PCRE设置了选项，则使用 PCRE 引擎，否则使用系统正则表达式库，进行扩展正则表达式语法匹配，就像 bash 一样。

因此，最简单的方法是：如果你使用 bash：

if [[ "$variable" =~ unquoted.*regex ]]; then
  matched_portion="${BASH_REMATCH[0]}"
  first_substring="${BASH_REMATCH[1]}"
fi

如果您不使用 Bash 或 Zsh，情况会变得更加复杂，因为您需要使用外部命令。

Question 3

更新答案-适用于（所有？）POSIX 兼容expr

也考虑一下/usr/bin/expr。

$ expr hello : '.\(...\)'
ell

$ expr hello : 'hel'
3

$ expr hello : '.*el'
3

$ expr hello : '.*\(el\)'
el

原始答案-适用于 GNUexpr

也考虑一下/usr/bin/expr。

$ expr substr hello 2 3
ell

您还可以将模式与字符串的开头进行匹配。

$ expr match hello h
1

$ expr match hello hell
4

$ expr match hello e
0

$ expr match hello 'h.*o'
5

$ expr match hello 'h.*l'
4

$ expr match hello 'h.*e'
2

Answer

更新答案-适用于（所有？）POSIX 兼容expr

也考虑一下/usr/bin/expr。

$ expr hello : '.\(...\)'
ell

$ expr hello : 'hel'
3

$ expr hello : '.*el'
3

$ expr hello : '.*\(el\)'
el

原始答案-适用于 GNUexpr

也考虑一下/usr/bin/expr。

$ expr substr hello 2 3
ell

您还可以将模式与字符串的开头进行匹配。

$ expr match hello h
1

$ expr match hello hell
4

$ expr match hello e
0

$ expr match hello 'h.*o'
5

$ expr match hello 'h.*l'
4

$ expr match hello 'h.*e'
2

Question 4

# $1 string
# $2 index of first char
# $3 index of last char
substr()
{
    subStrPatternPrefix=".\\{$2\\}"
    subStrPatternMatch="\\(.\\{$(( $3 - $2 + 1 ))\\}\\)"
    expr "$1" : "$subStrPatternPrefix$subStrPatternMatch"
}

使用示例：

string="Hello World!"
s1=$( substr "$string" 0 4 )
s2=$( substr "$string" 6 10 )
s3=$( substr "$string" 11 11 )

echo "$s1"
echo "$s2"
echo "$s3"

输出：

Hello
World
!

Answer

# $1 string
# $2 index of first char
# $3 index of last char
substr()
{
    subStrPatternPrefix=".\\{$2\\}"
    subStrPatternMatch="\\(.\\{$(( $3 - $2 + 1 ))\\}\\)"
    expr "$1" : "$subStrPatternPrefix$subStrPatternMatch"
}

使用示例：

string="Hello World!"
s1=$( substr "$string" 0 4 )
s2=$( substr "$string" 6 10 )
s3=$( substr "$string" 11 11 )

echo "$s1"
echo "$s2"
echo "$s3"

输出：

Hello
World
!

在 Unix shell 中提取子字符串的最简单方法？

答案1

答案2

答案3

答案4

相关内容