如何正确逐行解析 CSV 文件,且每行与另一行之间的逗号/列数不同?

如何正确逐行解析 CSV 文件,且每行与另一行之间的逗号/列数不同?

后续行动这个答案,使用此 csv 文件

\begin{filecontents*}[overwrite]{mydata.csv}
    TeMaxPartLoad,163.0576,,,,
    MaxSpeedTR,1065.9,,,,,
    coeffs, 3, 5, 9 , 8, 6 ,
\end{filecontents*}

我得到了预期的输出

在此处输入图片描述

然而,改变第一行的逗号数量会导致输出错误

\begin{filecontents*}[overwrite]{mydata.csv}
    TeMaxPartLoad,163.0576,
    MaxSpeedTR,1065.9,,,,,
    coeffs, 3, 5, 9 , 8, 6 ,
\end{filecontents*}

在此处输入图片描述

那么,如何纠正这种不良行为?

\begin{filecontents*}[overwrite]{mydata.csv}
    TeMaxPartLoad,163.0576,
    MaxSpeedTR,1065.9,,,,,
    coeffs, 3, 5, 9 , 8, 6 ,
\end{filecontents*}

\documentclass[12pt,a4paper]{article}

\ExplSyntaxOn
% Reading the file (based on <https://tex.stackexchange.com/a/575055/73317>)
\ior_new:N \l__diaa_csv_ior
\bool_new:N \l__diaa_csv_str_bool
\seq_new:N \l__diaa_csv_tmp_seq

% str mode (bool/star), key column, label, value columns, file
\NewDocumentCommand \ReadCSV { s O{1} m O{} m }
{
    \IfBooleanTF {#1}
    { \bool_set_true:N \l__diaa_csv_str_bool }
    { \bool_set_false:N \l__diaa_csv_str_bool }
    \diaa_csv_read:nnnn {#3} {#2} {#4} {#5}
}

% label, key column, value columns, file
\cs_new_protected:Npn \diaa_csv_read:nnnn #1 #2 #3 #4
{
    \tl_if_blank:nTF {#3}       % Detect number of columns and use 2 to last
    {
        \ior_open:NnTF \l__diaa_csv_ior {#4}
        {
            \bool_if:NTF \l__diaa_csv_str_bool
            { \ior_str_get:NN }
            { \ior_get:NN }
            \l__diaa_csv_ior \l_tmpa_tl
            
            \ior_close:N \l__diaa_csv_ior
            \seq_set_split:NnV \l_tmpa_seq { , } \l_tmpa_tl
            \seq_clear:N \l__diaa_csv_tmp_seq
            \int_step_inline:nnn { 2 } { \seq_count:N \l_tmpa_seq }
            { \seq_put_right:Nn \l__diaa_csv_tmp_seq {##1} }
        }
        { \msg_error:nnn { diaa } { file-not-found } {#4} }
    }
    { \seq_set_split:Nnn \l__diaa_csv_tmp_seq { , } {#3} } % explicit columns
    
    \ior_open:NnTF \l__diaa_csv_ior {#4}
    {
        \prop_new:c { g__diaa_csv_#1_prop }
        \__diaa_csv_read:nn {#1} {#2}
        \ior_close:N \l__diaa_csv_ior
    }
    { \msg_error:nnn { diaa } { file-not-found } {#4} }
}

\msg_new:nnn { diaa } { file-not-found }
{ File~`#1'~not~found. }

\cs_generate_variant:Nn \prop_put:Nnn { cxV }

% label, key column
\cs_new_protected:Npn \__diaa_csv_read:nn #1 #2
{
    \bool_if:NTF \l__diaa_csv_str_bool
    { \ior_str_map_inline:Nn }
    { \ior_map_inline:Nn }
    \l__diaa_csv_ior
    {
        \seq_set_split:Nnn \l_tmpa_seq { , } {##1} % split one CSV row
        \tl_clear:N \l_tmpa_tl
        \seq_map_inline:Nn \l__diaa_csv_tmp_seq
        {
            \tl_put_right:Nx \l_tmpa_tl { { \seq_item:Nn \l_tmpa_seq {####1} } }
        }
        
        \prop_put:cxV { g__diaa_csv_#1_prop }
        { \seq_item:Nn \l_tmpa_seq {#2} }
        \l_tmpa_tl
    }
}

\keys_define:nn { diaa / getPolyFromRow }
{
    global-assignment .bool_set:N = \l__diaa_gpfr_global_assignment_bool,
    global-assignment .default:n = true,
    global-assignment .initial:n = false,
    variable .tl_set:N = \l__diaa_gpfr_variable_name_tl,
    variable .value_required:n = true,
    variable .initial:n = X,
    typographical-variant .str_set:N = \l__diaa_gpfr_typographical_variant_str,
    typographical-variant .value_required:n = true,
    typographical-variant .initial:n = default,
}

% \getPolyFromRow will temporarily store the result in this variable. This
% allows us not to lose the result when the group started for \keys_set:nn
% ends (if the user chose to perform a local assignment, this must be done
% after closing that group).
\tl_new:N \g__diaa_gpfr_result_tl

% Options, macro for result, key, datafile label
\NewDocumentCommand \getPolyFromRow { O{} m m m }
{
    \group_begin:
    \keys_set:nn { diaa / getPolyFromRow } {#1}
    
    % Globally define _gfunc function aliases that perform global or local
    % assignments depending on \l__diaa_gpfr_global_assignment_bool. They
    % will be used *after* we close the current group.
    \bool_set_true:N \l__diaa_gpfr_dtfa_global_aliases_bool
    \__diaa_gpfr_define_tl_func_aliases:
    
    % Store the result in \g__diaa_gpfr_result_tl for now.
    \bool_set_true:N \l__diaa_gpfr_global_assignment_bool
    \diaa_get_poly_from_row:Nnn \g__diaa_gpfr_result_tl {#3} {#4}
    \group_end:
    
    % Use the globally-defined aliases to perform the user-chosen (local or
    % global) kind of assignment.
    \__diaa_clear_gfunc:N #2    % make sure the tl var #2 is defined
    \__diaa_set_eq_gfunc:NN #2 \g__diaa_gpfr_result_tl % set it
}

% True to globally define the aliases and give them _gfunc names rather than
% _func
\bool_new:N \l__diaa_gpfr_dtfa_global_aliases_bool

\cs_new_protected:Npn \__diaa_gpfr_define_tl_func_aliases:
{
    \bool_if:NTF \l__diaa_gpfr_global_assignment_bool
    {
        \__diaa_gpfr_define_alias:nnN { clear }     { N }  \tl_gclear_new:N
        \__diaa_gpfr_define_alias:nnN { set_eq }    { NN } \tl_gset_eq:NN
        \__diaa_gpfr_define_alias:nnN { put_right } { Nn } \tl_gput_right:Nn
    }
    {
        \__diaa_gpfr_define_alias:nnN { clear }     { N }  \tl_clear_new:N
        \__diaa_gpfr_define_alias:nnN { set_eq }    { NN } \tl_set_eq:NN
        \__diaa_gpfr_define_alias:nnN { put_right } { Nn } \tl_put_right:Nn
    }
}

% Locally or globally define an alias for a function. The alias is defined
% globally with a gfunc name if \l__diaa_gpfr_dtfa_global_aliases_bool is true.
%
% #1: stem such as “clear”, “put_right”, etc.
% #2: signature of the alias (e.g., “Nn”)
\cs_new_protected:Npn \__diaa_gpfr_define_alias:nnN #1#2
{
    \bool_if:NTF \l__diaa_gpfr_dtfa_global_aliases_bool
    { \cs_gset_eq:cN }
    { \cs_set_eq:cN }
    {
        __diaa_#1_
        \bool_if:NT \l__diaa_gpfr_dtfa_global_aliases_bool { g }
        func:#2
    }
}

\int_new:N \l__diaa_gpfr_degree_int
\tl_new:N \l__dia_gpfr_row_values_tl
\cs_generate_variant:Nn \__diaa_put_right_func:Nn { Nx }
\cs_generate_variant:Nn \__diaa_get_poly_from_row_append_monomial:Nnn { NnV }

% Macro for result, key, datafile label
\cs_new_protected:Npn \diaa_get_poly_from_row:Nnn #1#2#3
{
    % Locally define function aliases that perform global or local assignments
    % depending on \l__diaa_gpfr_global_assignment_bool: \__diaa_clear_func:Nn,
    % \__diaa_put_right_func:Nn, etc.
    \bool_set_false:N \l__diaa_gpfr_dtfa_global_aliases_bool
    \__diaa_gpfr_define_tl_func_aliases:
    
    % Retrieve the coefficients
    \prop_get:cnN { g__diaa_csv_#3_prop } {#2} \l__dia_gpfr_row_values_tl
    
    % Let's put (1 + degree) for now in this int variable.
    \int_zero:N \l__diaa_gpfr_degree_int
    \tl_map_inline:Nn \l__dia_gpfr_row_values_tl
    {
        \tl_if_empty:nT {##1} { \tl_map_break: }
        \int_incr:N \l__diaa_gpfr_degree_int
    }
    
    \__diaa_clear_func:N #1        % initialize #1 as a tl var if necessary
    \bool_set_false:N \l_tmpa_bool % true: add + operator if next coeff is > 0
    
    \tl_map_inline:Nn \l__dia_gpfr_row_values_tl % loop over the coefficients
    {
        % Degree of the monomial we're about to output
        \int_decr:N \l__diaa_gpfr_degree_int
        % Early termination condition if the row is not full of coefficients
        \int_compare:nNnT { \l__diaa_gpfr_degree_int } < { 0 }
        { \tl_map_break: }
        
        \fp_compare:nNnF {##1} = { 0 }
        {
            % Insert a + operator if necessary
            \bool_if:NTF \l_tmpa_bool
            {
                \fp_compare:nNnT {##1} > { 0 }
                { \__diaa_put_right_func:Nn #1 { + } }
            }
            { \bool_set_true:N \l_tmpa_bool }
            
            % Insert the monomial
            \__diaa_get_poly_from_row_append_monomial:NnV #1 {##1}
            \l__diaa_gpfr_variable_name_tl
        }
    }
}

\msg_new:nnn { diaa } { gpfr-unknown-typo-variant }
{ Unknown~typographical~variant~for~\token_to_str:N \getPolyFromRow :~`#1'. }

\cs_generate_variant:Nn \msg_error:nnn { nnV }

% Macro, coefficient, variable name
\cs_new_protected:Npn \__diaa_get_poly_from_row_append_monomial:Nnn #1#2#3
{
    \str_case_e:nnF { \l__diaa_gpfr_typographical_variant_str }
    {
        { default }     { \tl_set:Nn \l_tmpa_tl { #2 \times #3 ^ } }
        { with-braces } { \tl_set:Nn \l_tmpa_tl { #2 \times {#3} ^ } }
    }
    {
        \msg_error:nnV { diaa } { gpfr-unknown-typo-variant }
        \l__diaa_gpfr_typographical_variant_str
    }
    
    \__diaa_put_right_func:Nx #1
    {
        \int_case:nnF { \l__diaa_gpfr_degree_int } % depending on the degree...
        {
            { 0 } { \exp_not:n {#2} } % degree 0 → only the coefficient
            { 1 } { \exp_not:n { #2 \times #3 } }
        }
        { % Other degrees
            \exp_not:V \l_tmpa_tl % use the selected variant
            % Use braces in case the exponent has several digits
            { \int_use:N \l__diaa_gpfr_degree_int }
        }
    }
}

\ExplSyntaxOff

\ReadCSV{DrivingData}{mydata.csv}

\getPolyFromRow{\eq}{coeffs}{DrivingData}

\begin{document}
    $\eq$
\end{document}

答案1

我知道你有兴趣扩展当前的代码,但作为替代方案,这里有一个带有的版本xstring。这个想法是逐字读取文件(使用catchfile包),然后处理最后一行,分离第一个系数,打印它,然后递归地继续处理该行的其余部分。

这要求:1. 包含系数的行是 csv 文件中的最后一行;2. 系数行已标记系数3. 单词之间没有空格系数和第一个逗号。这三个要求都可以通过更多的代码来解决,但这只是一个概念证明。

\documentclass{article}
\usepackage{catchfile}
\usepackage{xstring}

\def\printeq#1{%
% count comma's (=number of coefficients-1)
\StrCount{#1}{,}[\ncomma]%
% final coefficient, print as is
\ifnum\ncomma=0\relax#1\else%
   % else split current coefficient from the rest of the line
   \StrCut{#1}{,}{\coeff}{\newcoeffs}
   % 2nd last, don't print power
   \ifnum\ncomma=1\relax\coeff\times X+\else%
      % else print power
      \coeff\times X^{\ncomma}+%
   \fi%
   % recursive call if not final coefficient
   \printeq{\newcoeffs}%
\fi%
}

\begin{document}
% read csv file verbatim
\makeatletter
\CatchFileDef{\fulltext}{coeffdata.csv}{\let\do\@makeother\dospecials}
\makeatother
% find final line that starts with "coeffs,"
\StrBehind*{\fulltext}{coeffs,}[\coeffs]
% remove all spaces
\StrDel{\coeffs}{ }[\coeffs]
% remove final comma if present
\IfEndWith{\coeffs}{,}{\StrGobbleRight{\coeffs}{1}[\coeffs]}{}
% print equation recursively
$\printeq{\coeffs}$

\end{document}

结果和问题中第一个截图一样。

相关内容