之后 ...

之后 ...

我有以下代码

\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage{pgfplotstable}
\usepackage{pgfplots}
\pgfplotsset{compat=1.11}

\pgfplotstableread{
x1 y1 y2
0   1   1
1   2   4
2   3   9
3   4   16
4   5   25
5   6   36
6   7   49
7   8   64
8   9   81
9   10  100
10  10  100
}\tableLabel

\begin{document}

\begin{tikzpicture}
\begin{axis}[
legend pos = north west,
title = Cool Graph,
xlabel ={Cool X},
ylabel ={Cool Y},
grid = major,
legend entries = {best fit, y1, y2},
]

\addlegendentry{%
    $\pgfmathprintnumber{\pgfplotstableregressiona} \cdot x
    \pgfmathprintnumber[print sign]{\pgfplotstableregressionb}$ lin.     Regression} %
\addlegendentry{y1}

\addplot table [x index=0, y = {create col/linear regression={y=y2}}]     {\tableLabel};
\addplot table [x index=0, y = y1]{\tableLabel};        %The data entries in column y1
\addplot table [x index=0, y index=2]{\tableLabel};     %The data entries in column y2
%\addplot table[row sep=\\, y={create col/linear regression={y=Y}}]

\end{axis}
\end{tikzpicture}
\end{document}

我还希望在图表上显示 R^2 值(最小二乘回归值),可以吗?

答案1

这需要一点工作量,但这是一种可能的方法。R^2 的计算方法是先计算总平方和,然后计算残差平方和,参见。https://en.wikipedia.org/wiki/Coefficient_of_determination#Definitions

例如,人们可以将其全部包装在宏中,如果需要进行多次回归,这将变得更加方便,但我将把它留到以后再讲。

输出

\documentclass{article}
\usepackage{pgfplotstable} % loads pgfplots
\pgfplotsset{compat=1.11}

\pgfplotstableread{
x1 y1 y2
0   1   1
1   2   4
2   3   9
3   4   16
4   5   25
5   6   36
6   7   49
7   8   64
8   9   81
9   10  100
10  10  100
}\tableLabel

% create the regression column:
\pgfplotstablecreatecol[linear regression={x=x1,y=y2}]{regression}{\tableLabel}
% store slope and intercept
\edef\slope{\pgfplotstableregressiona}
\edef\intercept{\pgfplotstableregressionb}

% calculate sum of y2
\pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{y2}}]{cumy2}{\tableLabel}

% find number of rows in table
\pgfplotstablegetrowsof{\tableLabel}
\pgfmathtruncatemacro{\lastrow}{\pgfplotsretval-1}

% get total sum of y2
\pgfplotstablegetelem{\lastrow}{cumy2}\of\tableLabel

% calculate mean
\pgfmathsetmacro{\yMean}{\pgfplotsretval/(\lastrow+1)}

% calculate residuals and diff from mean
\pgfplotstablecreatecol[
  create col/assign/.code={
       \pgfmathparse{(\thisrow{y2}-\thisrow{regression})^2}
       \edef\entry{\pgfmathresult}
       \pgfkeyslet{/pgfplots/table/create col/next content}\entry
    }
]{residuals}{\tableLabel}
\pgfplotstablecreatecol[
  create col/assign/.code={
       \pgfmathparse{(\thisrow{y2}-\yMean)^2}
       \edef\entry{\pgfmathresult}
       \pgfkeyslet{/pgfplots/table/create col/next content}\entry
    }
]{diffmean}{\tableLabel}

% calculate sum of residuals and diff from mean
\pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{residuals}}]{sumres}{\tableLabel}
\pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{diffmean}}]{sumdiff}{\tableLabel}

% extract SS_res and SS_tot
\pgfplotstablegetelem{\lastrow}{sumres}\of\tableLabel
\pgfmathsetmacro{\SSres}{\pgfplotsretval}
\pgfplotstablegetelem{\lastrow}{sumdiff}\of\tableLabel
\pgfmathsetmacro{\SStot}{\pgfplotsretval}

% calculate R^2
\pgfmathsetmacro{\Rsquared}{1-\SSres/\SStot}
\begin{document}

\begin{tikzpicture}
\begin{axis}[
legend pos = north west,
title = Cool Graph,
xlabel ={Cool X},
ylabel ={Cool Y},
grid = major,
legend entries = {best fit, y1, y2},
]

\addlegendentry{%
    $\pgfmathprintnumber{\slope} \cdot x
    \pgfmathprintnumber[print sign]{\intercept}$, $R^2 = \Rsquared$} %
\addlegendentry{y2}

\addplot table [x index=0, y = regression]     {\tableLabel};
\addplot table [x index=0, y index=2]{\tableLabel};   

\end{axis}
\end{tikzpicture}
\end{document}

之后 ...

下面是一个非常基本的宏实现,用作

\MakeRegression{x1}{y1}{reg1}{\SlA}{\IntA}{\RsqA}

前三个参数是列名,分别是要使用的 x 和 y 数据,以及新的回归列。后三个是保存斜率、截距和 R 平方的宏。有一个可选参数用于指定原始表的名称,设置为默认值\tableLabel,因为这恰好是本例中使用的表名。我确信这可能是 n

我用了安斯康姆四重奏作为示例数据。

安斯康姆四重奏

\documentclass[border=5mm]{standalone}
\usepackage{pgfplotstable} % loads pgfplots as well
\usepgfplotslibrary{groupplots}
\pgfplotsset{compat=1.11}

\pgfplotstableread{
x1  y1  x2  y2  x3  y3  x4  y4
10.0    8.04    10.0    9.14    10.0    7.46    8.0 6.58
8.0 6.95    8.0 8.14    8.0 6.77    8.0 5.76
13.0    7.58    13.0    8.74    13.0    12.74   8.0 7.71
9.0 8.81    9.0 8.77    9.0 7.11    8.0 8.84
11.0    8.33    11.0    9.26    11.0    7.81    8.0 8.47
14.0    9.96    14.0    8.10    14.0    8.84    8.0 7.04
6.0 7.24    6.0 6.13    6.0 6.08    8.0 5.25
4.0 4.26    4.0 3.10    4.0 5.39    19.0    12.50
12.0    10.84   12.0    9.13    12.0    8.15    8.0 5.56
7.0 4.82    7.0 7.26    7.0 6.42    8.0 7.91
5.0 5.68    5.0 4.74    5.0 5.73    8.0 6.89
}\tableLabel


\newcommand\MakeRegression[7][\tableLabel]{%
    % #1: original datatable
    % #2: x-column
    % #3: y-column
    % #4: name of new regression column
    % #5: macro to store slope in
    % #6: macro to store intercept in
    % #7: macro to store R squared

    % create the regression column:
    \pgfplotstablecreatecol[linear regression={x=#2,y=#3}]{#4}{\tableLabel}
    % store slope and intercept
    \edef#5{\pgfplotstableregressiona}
    \edef#6{\pgfplotstableregressionb}

    % calculate sum of y
    \pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{#3}}]{tmp}{\tableLabel}

    % find number of rows in table
    \pgfplotstablegetrowsof{\tableLabel}
    \pgfmathtruncatemacro{\lastrow}{\pgfplotsretval-1}

    % get total sum of y
    \pgfplotstablegetelem{\lastrow}{tmp}\of\tableLabel

    % calculate mean
    \pgfmathsetmacro{\yMean}{\pgfplotsretval/(\lastrow+1)}

    % calculate residuals and diff from mean
    \pgfplotstablecreatecol[
      create col/assign/.code={
           \pgfmathparse{(\thisrow{#3}-\thisrow{#4})^2}
           \edef\entry{\pgfmathresult}
           \pgfkeyslet{/pgfplots/table/create col/next content}\entry
        }
    ]{residuals}{\tableLabel}
    \pgfplotstablecreatecol[
      create col/assign/.code={
           \pgfmathparse{(\thisrow{#3}-\yMean)^2}
           \edef\entry{\pgfmathresult}
           \pgfkeyslet{/pgfplots/table/create col/next content}\entry
        }
    ]{diffmean}{\tableLabel}

    % calculate sum of residuals and diff from mean
    \pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{residuals}}]{sumres}{\tableLabel}
    \pgfplotstablecreatecol[create col/expr={\pgfmathaccuma+\thisrow{diffmean}}]{sumdiff}{\tableLabel}

    % extract SS_res and SS_tot
    \pgfplotstablegetelem{\lastrow}{sumres}\of\tableLabel
    \pgfmathsetmacro{\SSres}{\pgfplotsretval}
    \pgfplotstablegetelem{\lastrow}{sumdiff}\of\tableLabel
    \pgfmathsetmacro{\SStot}{\pgfplotsretval}

    % calculate R^2
    \pgfmathsetmacro{#7}{1-\SSres/\SStot}
}

% use macro
\MakeRegression{x1}{y1}{reg1}{\SlA}{\IntA}{\RsqA}
\MakeRegression{x2}{y2}{reg2}{\SlB}{\IntB}{\RsqB}
\MakeRegression{x3}{y3}{reg3}{\SlC}{\IntC}{\RsqC}
\MakeRegression{x4}{y4}{reg4}{\SlD}{\IntD}{\RsqD}

% for easier formatting of legends
\newcommand\LegendEntry[3]{%
$\pgfmathprintnumber{#1}
  \cdot x 
 \pgfmathprintnumber[print sign]{#2}$,
   $R^2 = \pgfmathprintnumber{#3}$%
}

\begin{document}
\begin{tikzpicture}
\begin{groupplot}[
  group style={
    group size=2 by 2,
  }
]

\nextgroupplot[legend pos=north west]
\addplot +[mark=none] table [x=x1, y=reg1] {\tableLabel};
\addplot +[only marks] table [x=x1, y=y1]  {\tableLabel};   

\addlegendentry{\LegendEntry{\SlA}{\IntA}{\RsqA}}
\addlegendentry{y1}

\nextgroupplot[legend pos=south east]
\addplot +[mark=none] table [x=x2, y=reg2] {\tableLabel};
\addplot +[only marks] table [x=x2, y=y2]  {\tableLabel};   

\addlegendentry{\LegendEntry{\SlC}{\IntB}{\RsqB}}
\addlegendentry{y2}

\nextgroupplot[legend pos=north west]
\addplot +[mark=none] table [x=x3, y=reg3] {\tableLabel};
\addplot +[only marks] table [x=x3, y=y3]  {\tableLabel};   

\addlegendentry{\LegendEntry{\SlC}{\IntC}{\RsqC}}
\addlegendentry{y3}

\nextgroupplot[legend pos=south east]
\addplot +[mark=none] table [x=x4, y=reg4] {\tableLabel};
\addplot +[only marks] table [x=x4, y=y4]  {\tableLabel};   

\addlegendentry{\LegendEntry{\SlD}{\IntD}{\RsqD}}
\addlegendentry{y4}


\end{groupplot}
\end{tikzpicture}
\end{document}

相关内容