如何分割字符串?

如何分割字符串?

我需要将一个字符串拆分成一个或多个子字符串。我知道我可以使用包xstring,但我只想使用内置的 TeX/LaTeX 命令来完成。所以,如果我说

\def\MyTeXKnowledge{Not good enough}

从宏中提取子字符串“Not”、“good”和“enough”\MyTexKnowledge并将它们存储在变量中的最简单的方法是什么?

答案1

您需要定义一个在参数文本中带有分隔字符的宏:

\def\testthreewords#1{\threewords#1\relax}
\def\threewords#1 #2 #3\relax{ First: (#1), Second: (#2), Third: (#3) }
\testthreewords{Now good enough}

如果您希望能够提供宏作为参数,则需要先将其展开。这可以做一次(仅第一个宏展开一次):

\def\testthreewords#1{\expandafter\threewords#1\relax}

或完全:

\def\testthreewords#1{%
    \begingroup
    \edef\@tempa{#1}%
    \expandafter\endgroup
    \expandafter\threewords\@tempa\relax
}

此处\relax用作结束标记,不得出现在参数中,否则应使用不同的宏,如\@nnil。添加分组以保持临时定义在本地。

但是,如果参数中不包含两个空格,则此设置会失败并出现错误。为了安全起见,您应该单独读取每个子字符串,并将分隔符添加到末尾作为故障保护。然后测试是否已到达末尾:

\def\testwords#1{%
    \begingroup
    \edef\@tempa{#1\space}%
    \expandafter\endgroup
    \expandafter\readwords\@tempa\relax
}
\def\readwords#1 #2\relax{%
      \doword{#1}%  #1 = substr, #2 = rest of string
      \begingroup
      \ifx\relax#2\relax  % is #2 empty?
         \def\next{\endgroup\endtestwords}% your own end-macro if required
      \else
         \def\next{\endgroup\readwords#2\relax}%
      \fi
      \next
}
\def\doword#1{(#1)}
\def\endtestwords{}


\testwords{Now good enough}% Gives `(Now)(good)(enough)`
\testwords{Now good}% Gives `(Now)(good)`

 

答案2

另一种方式:这些词被储存在宏\worda \wordb等中。

\documentclass[a4paper]{article}  

\newcount\nbofwords
\makeatletter  
\def\myutil@empty{}
\def\multiwords#1 #2\@nil{% 
 \def\NextArg{#2}%
 \advance\nbofwords by  1 %   
 \expandafter\edef\csname word\@alph\nbofwords\endcsname{#1}% 
 \ifx\myutil@empty\NextArg
     \let\next\@gobble
 \fi
 \next#2\@nil
}%    

\def\GetWords#1{%
   \let\next\multiwords 
   \nbofwords=0 %
   \expandafter\next#1 \@nil %
}% 
\makeatother

\begin{document}
 \def\MyTeXKnowledge{Not good  enough the end}
\GetWords{\MyTeXKnowledge}

There are \the\nbofwords\  words:  \worda; \wordb; \wordc;\wordd;\worde.

\end{document} 

現在\MyTeXKnowledge已被接受。

答案3

自 2023 年起,还有其他选择。例如使用expl3编程环境:

\documentclass{book}

\ExplSyntaxOn
\NewDocumentCommand{\getNth}{mmm}
  {
    % #1 string, #2 separator, #3 index
    \seq_set_split:Nnx \l_tmpa_seq { #2 } { #1 }
    \seq_item:Nn \l_tmpa_seq { #3 }
  }
\ExplSyntaxOff

\begin{document}

\def\mywords{first second third last}

% split by spaces and get the first item
\getNth{\mywords}{ }{1}

% split by spaces and get the last item
\getNth{\mywords}{ }{-1}

\end{document}

输出

first
last

或者如果你想将一个函数应用于每个项目:

\documentclass{book}

\ExplSyntaxOn
\NewDocumentCommand{\mapToFunction}{m}
  {
    % split by space "~"
    \seq_set_split:Nnx \l_tmpa_seq { ~ } { #1 }
    \seq_map_indexed_function:NN \l_tmpa_seq \__xyz_myfunction:nn
  }
\cs_new:Nn \__xyz_myfunction:nn
  {
    % #1 is the 1-based index and #2 is the current item
    % if necessary check the index with \int_compare, \int_case, or \bool_case
    % do something
    \par #1~#2 
  }
\ExplSyntaxOff

\begin{document}

\def\mywords{first second third last}

\mapToFunction{\mywords}

\end{document}

输出

1 first
2 second
3 third
4 last

答案4

受到 wolfrevo 的尝试的启发:

\documentclass{article}

\ExplSyntaxOn
% the prefix is `clint' because of the OP's avatar

\NewDocumentCommand{\definechunkcontainer}{s m O{~} m}
 {% #1 = boolean
  % #2 = symbolic name
  % #3 = separator (default a space)
  % #4 = text or control sequence
  \IfBooleanTF { #1 }
   {
    \clint_chunk_define:onnn { #4 } { #2 } { #3 }
   }
   {
    \clint_chunk_define:nnnn { #4 } { #2 } { #3 }
   }
 }

\NewExpandableDocumentCommand{\getchunk}{o m}
 {% #1 = chunk number; if omitted we get the number of chunks
  % #2 = symbolic name
  \IfNoValueTF { #1 }
   {
    \seq_count:c { l__clint_chunk_#2_seq }
   }
   {
    \seq_item:cn { l__clint_chunk_#2_seq } { #1 }
   }
 }

\NewDocumentCommand{\processchunks}{m o +m}
 {% #1 = symbolic name
  % #2 = optional tokens to be inserted between chunks
  % #3 = template where #1 stands for the chunk number and #2 for the chunk
  \IfNoValueTF { #2 }
   {% easier processing
    \clint_chunk_process:nn { #1 } { #3 }
   }
   {% more complex processing
    \clint_chunk_process:nnn { #1 } { #2 } { #3 }
   }
 }

\seq_new:N \l__clint_chunk_temp_seq
\cs_generate_variant:Nn \seq_set_split:Nnn { c }
\cs_generate_variant:Nn \seq_map_indexed_function:NN { c }
\cs_generate_variant:Nn \seq_map_indexed_inline:Nn { c }

\cs_new_protected:Nn \clint_chunk_define:nnnn
 {% #1 = text to be split
  % #2 = symbolic name
  % #3 = separator
  \seq_clear_new:c { l__clint_chunk_#2_seq }
  \seq_set_split:cnn { l__clint_chunk_#2_seq } { #3 } { #1 }
 }
\cs_generate_variant:Nn \clint_chunk_define:nnnn { o }

\cs_new_protected:Nn \clint_chunk_process:nn
 {
  \cs_set:Nn \__clint_chunk_process_do:nn { #2 }
  \seq_map_indexed_function:cN { l__clint_chunk_#1_seq } \__clint_chunk_process_do:nn
 }

\cs_new_protected:Nn \clint_chunk_process:nnn
 {
  \seq_clear:N \l__clint_chunk_temp_seq
  \cs_set:Nn \__clint_chunk_process_do:nn { #3 }
  \seq_map_indexed_inline:cn { l__clint_chunk_#1_seq }
   {
    \seq_put_right:Nn \l__clint_chunk_temp_seq { \__clint_chunk_process_do:nn { ##1 } { ##2 } }
   }
  \seq_use:Nn \l__clint_chunk_temp_seq { #2 }
 }

\ExplSyntaxOff

\begin{document}

% a couple of containers
\definechunkcontainer{myTeXknowledge}{not good enough}

\newcommand{\gbu}{The Good -- The Bad -- The Ugly}

\definechunkcontainer*{movie}[--]{\gbu}

% now let's test

\getchunk{myTeXknowledge} (expected: 3)

\getchunk[2]{myTeXknowledge} (expected: good)

\getchunk[3]{movie} (expected: The Ugly)

\processchunks{myTeXknowledge}{#1: #2\par}

\processchunks{movie}[/]{#2}

\begin{itemize}
\processchunks{movie}{\item[#1)] #2}
\end{itemize}

\begin{enumerate}
\processchunks{movie}{\item #2}
\end{enumerate}

\end{document}

在此处输入图片描述

相关内容