将书目按作者（字母顺序）分组

Question

如果可以借助一些外部工具对 bib 文件进行预处理，那么这是可能的。给定一个 bib 文件，可以使用 Python 确定作者姓名并相应地对条目进行排序。然后，使用 biblatex 的类别特征。

主要组件的源代码如下所示。完整源代码请参见https://github.com/xziyue/latex-auto-categorized-bib。

解释

Python 脚本分析参考书目文件并生成供 LaTeX 读取的输出文件。输出文件将如下所示：

...
\BibAuthorInfo{google-llc}{android_formats}
\BibAuthorInfo{greenwald-j}{greenwald_2019}
\BibAuthorInfo{grinstein-eric}{grinstein2018audio}
\BibAuthorInfo{grobman-s}{grobman_2019}
\BibAuthorInfo{gu-quanquan}{gu2011linear}
\BibAuthorInfo{guan-haiying}{guan2019mfc}
...
\BibAllAuthors{adobe-inc,almutairi-zaynab,altinisik-enes,apple-inc,avidemux-contributors,ba-lei-jimmy,bansal-vipin,bartusiak-r-emily,bayram-sevinc,bestagini-paolo,bhagtani-kratika,bharati-a,bianchi-tiziano,...}

它包括与每个作者相关的书目项目，以及根据姓名排序的作者列表。

在 LaTeX 端，如果--shell-escape启用，则编译时可以自动调用 Python 脚本。否则，用户也可以手动运行 Python 脚本（如果 bib 文件不经常更改）。示例中自动运行 Python 脚本并读回输出。
```
\immediate\write18{python3 bib_categorizer.py \jobname-bibinfo.tex example.bib}
% load the generate file
\input{\jobname-bibinfo.tex}
```
将为\BibAuthorInfo每个作者姓名创建一个新类别。
用户可以使用\PrintBibBetween命令打印排序列表中两个作者之间的书目条目。在示例中，\PrintBibBetween{ito-keith}{kabir-mohsin-muhammad}使用。

LaTeX 源代码（test.tex）

\documentclass{article}
\usepackage[citestyle=authoryear,bibstyle=numeric]{biblatex}
\addbibresource{example.bib}


\ExplSyntaxOn

\cs_new:Npn \BibAuthorInfo #1#2
{
    \DeclareBibliographyCategory{#1}
    \addtocategory{#1}{#2}
}

\clist_new:N \g_bibinfo_all_authors_clist
\clist_new:N \l_bibinfo_tmp_clist
\cs_new:Npn \BibAllAuthors #1
{
    \clist_gset:Nn \g_bibinfo_all_authors_clist {#1}
}


\bool_new:N \l_bibinfo_start_found_bool
\bool_new:N \l_bibinfo_end_found_bool
\bool_new:N \l_bibinfo_loop_end_bool
\tl_new:N \l_bibinfo_tmpa_tl
\tl_new:N \l_bibinfo_tmpb_tl
\tl_new:N \l_bibinfo_tmpc_tl
\cs_new:Npn \PrintBibBetween #1#2
{
    \clist_set_eq:NN \l_bibinfo_tmp_clist \g_bibinfo_all_authors_clist
    \bool_set_false:N \l_bibinfo_start_found_bool
    \bool_set_false:N \l_bibinfo_end_found_bool
    \bool_set:Nn \l_bibinfo_loop_end_bool {\clist_if_empty_p:N \l_bibinfo_tmp_clist}

    \bool_until_do:nn {\l_bibinfo_loop_end_bool}
    {
        \clist_pop:NN \l_bibinfo_tmp_clist \l_bibinfo_tmpa_tl
        \exp_args:NV \str_if_eq:nnT  \l_bibinfo_tmpa_tl {#1}
        {
            \bool_set_true:N \l_bibinfo_start_found_bool
        }
        \exp_args:NV \str_if_eq:nnT \l_bibinfo_tmpa_tl {#2}
        {
            \bool_set_true:N \l_bibinfo_end_found_bool
        }
        \bool_if:nT {\l_bibinfo_start_found_bool}
        {
            \tl_set:Nx \l_bibinfo_tmpc_tl {\exp_not:N\printbibliography[category={\l_bibinfo_tmpa_tl},heading=none]}
            \tl_use:N \l_bibinfo_tmpc_tl
        }
        \bool_set:Nn \l_bibinfo_loop_end_bool {\clist_if_empty_p:N \l_bibinfo_tmp_clist || \l_bibinfo_end_found_bool}
    }

    \bool_if:nF {\l_bibinfo_start_found_bool}
    {
        \GenericError{}{Cannot found bib start item "#1"}{}{}
    }

    \bool_if:nF {\l_bibinfo_end_found_bool}
    {
        \GenericError{}{Cannot found bib end item "#2"}{}{}
    }
}


\ExplSyntaxOff


% make sure --shell-escape is enabled (if you want this process to be done automatically when compiling in LaTeX)
% call Python script to process bibliography files
% if there are more than one file, append to the argument list
\immediate\write18{python3 bib_categorizer.py \jobname-bibinfo.tex example.bib}
% load the generate file
\input{\jobname-bibinfo.tex}

\begin{document}

Is is working?

\nocite{*}

\PrintBibBetween{ito-keith}{kabir-mohsin-muhammad}


\end{document}

Python 脚本（bib_categorizer.py）

Python 脚本需要bibtexparser包。

import bibtexparser
import sys
import os
import re
import string

assert len(sys.argv) >= 3, 'invalid number of arguments'
output_filename = sys.argv[1]
input_filenames = sys.argv[2:]


# only allow certain characters in names
def clean_name_segments(s:str)->str:
    new_s = ''
    cand = string.ascii_letters + ' '
    for c in s:
        if c in cand:
            new_s += c

    return new_s

# return a list of authors
# each author name is represented using a list, where name segments are ordered from first name to last name
def process_author_names(s:str)->list:
    s = s.strip()

    braces_match = re.match('\{(.*)\}', s)
    if braces_match:
        return [[clean_name_segments(braces_match.group(1))]]
    
    ret = []

    parts = s.split(' and ')
    for part in parts:
        if ',' in part:
            last, _, first = part.partition(',')
            ret.append(first.split(' ') + [last])
        else:
            ret.append(part.split(' '))

    for item in ret:
        new_item = [clean_name_segments(x.strip()) for x in item]
        new_item = [x for x in new_item if x]
        item.clear()
        item.extend(new_item)

    return ret
    

def get_author_sort_key(e:list):
    return tuple(map(lambda x: x.lower(), reversed(e)))

author_lut = dict()

for fn in input_filenames:
    assert os.path.exists(fn), f'input file "{fn}" does not exist'

    with open(fn) as f:
        bib = bibtexparser.load(f)
    
    for entry in bib.entries:
        entry_name = entry['ID']

        if 'author' not in entry:
            print(f'Entry {entry_name} does not have author field, it is skipped')
            continue
        
        authors = process_author_names(entry['author'].replace('\r', ' ').replace('\n', ' '))

        # sort based on first author
        first_author_key = get_author_sort_key(authors[0])
        if first_author_key not in author_lut:
            author_lut[first_author_key] = []

        author_lut[first_author_key].append(entry_name)

author_sorted = sorted(list(author_lut.keys()))

output_lines = []
author_name_ids = []
for ind, author_name_seg in enumerate(author_sorted):
    author_bib_items = author_lut[author_name_seg]
    author_name_id = (' '.join(author_name_seg)).replace(' ', '-')
    author_name_ids.append(author_name_id)
    output_lines.append(r'\BibAuthorInfo{%s}{%s}' % (author_name_id, ','.join(author_bib_items)))

output_lines.append('\BibAllAuthors{%s}' % ','.join(author_name_ids))

with open(output_filename, 'w') as f:
    f.write('\n'.join(output_lines))

改进了作者排序

def get_author_sort_key(e:list):
    ret = [x.lower() for x in e]
    if len(ret) <= 2:
        return tuple(reversed(ret))
    else:
        ret_ = [ret[-1]] + [[ret[0]] + ret[1:-1]
        return tuple(ret_)

Answer 1

如果可以借助一些外部工具对 bib 文件进行预处理，那么这是可能的。给定一个 bib 文件，可以使用 Python 确定作者姓名并相应地对条目进行排序。然后，使用 biblatex 的类别特征。

主要组件的源代码如下所示。完整源代码请参见https://github.com/xziyue/latex-auto-categorized-bib。

解释

Python 脚本分析参考书目文件并生成供 LaTeX 读取的输出文件。输出文件将如下所示：

...
\BibAuthorInfo{google-llc}{android_formats}
\BibAuthorInfo{greenwald-j}{greenwald_2019}
\BibAuthorInfo{grinstein-eric}{grinstein2018audio}
\BibAuthorInfo{grobman-s}{grobman_2019}
\BibAuthorInfo{gu-quanquan}{gu2011linear}
\BibAuthorInfo{guan-haiying}{guan2019mfc}
...
\BibAllAuthors{adobe-inc,almutairi-zaynab,altinisik-enes,apple-inc,avidemux-contributors,ba-lei-jimmy,bansal-vipin,bartusiak-r-emily,bayram-sevinc,bestagini-paolo,bhagtani-kratika,bharati-a,bianchi-tiziano,...}

它包括与每个作者相关的书目项目，以及根据姓名排序的作者列表。

在 LaTeX 端，如果--shell-escape启用，则编译时可以自动调用 Python 脚本。否则，用户也可以手动运行 Python 脚本（如果 bib 文件不经常更改）。示例中自动运行 Python 脚本并读回输出。
```
\immediate\write18{python3 bib_categorizer.py \jobname-bibinfo.tex example.bib}
% load the generate file
\input{\jobname-bibinfo.tex}
```
将为\BibAuthorInfo每个作者姓名创建一个新类别。
用户可以使用\PrintBibBetween命令打印排序列表中两个作者之间的书目条目。在示例中，\PrintBibBetween{ito-keith}{kabir-mohsin-muhammad}使用。

LaTeX 源代码（test.tex）

\documentclass{article}
\usepackage[citestyle=authoryear,bibstyle=numeric]{biblatex}
\addbibresource{example.bib}


\ExplSyntaxOn

\cs_new:Npn \BibAuthorInfo #1#2
{
    \DeclareBibliographyCategory{#1}
    \addtocategory{#1}{#2}
}

\clist_new:N \g_bibinfo_all_authors_clist
\clist_new:N \l_bibinfo_tmp_clist
\cs_new:Npn \BibAllAuthors #1
{
    \clist_gset:Nn \g_bibinfo_all_authors_clist {#1}
}


\bool_new:N \l_bibinfo_start_found_bool
\bool_new:N \l_bibinfo_end_found_bool
\bool_new:N \l_bibinfo_loop_end_bool
\tl_new:N \l_bibinfo_tmpa_tl
\tl_new:N \l_bibinfo_tmpb_tl
\tl_new:N \l_bibinfo_tmpc_tl
\cs_new:Npn \PrintBibBetween #1#2
{
    \clist_set_eq:NN \l_bibinfo_tmp_clist \g_bibinfo_all_authors_clist
    \bool_set_false:N \l_bibinfo_start_found_bool
    \bool_set_false:N \l_bibinfo_end_found_bool
    \bool_set:Nn \l_bibinfo_loop_end_bool {\clist_if_empty_p:N \l_bibinfo_tmp_clist}

    \bool_until_do:nn {\l_bibinfo_loop_end_bool}
    {
        \clist_pop:NN \l_bibinfo_tmp_clist \l_bibinfo_tmpa_tl
        \exp_args:NV \str_if_eq:nnT  \l_bibinfo_tmpa_tl {#1}
        {
            \bool_set_true:N \l_bibinfo_start_found_bool
        }
        \exp_args:NV \str_if_eq:nnT \l_bibinfo_tmpa_tl {#2}
        {
            \bool_set_true:N \l_bibinfo_end_found_bool
        }
        \bool_if:nT {\l_bibinfo_start_found_bool}
        {
            \tl_set:Nx \l_bibinfo_tmpc_tl {\exp_not:N\printbibliography[category={\l_bibinfo_tmpa_tl},heading=none]}
            \tl_use:N \l_bibinfo_tmpc_tl
        }
        \bool_set:Nn \l_bibinfo_loop_end_bool {\clist_if_empty_p:N \l_bibinfo_tmp_clist || \l_bibinfo_end_found_bool}
    }

    \bool_if:nF {\l_bibinfo_start_found_bool}
    {
        \GenericError{}{Cannot found bib start item "#1"}{}{}
    }

    \bool_if:nF {\l_bibinfo_end_found_bool}
    {
        \GenericError{}{Cannot found bib end item "#2"}{}{}
    }
}


\ExplSyntaxOff


% make sure --shell-escape is enabled (if you want this process to be done automatically when compiling in LaTeX)
% call Python script to process bibliography files
% if there are more than one file, append to the argument list
\immediate\write18{python3 bib_categorizer.py \jobname-bibinfo.tex example.bib}
% load the generate file
\input{\jobname-bibinfo.tex}

\begin{document}

Is is working?

\nocite{*}

\PrintBibBetween{ito-keith}{kabir-mohsin-muhammad}


\end{document}

Python 脚本（bib_categorizer.py）

Python 脚本需要bibtexparser包。

import bibtexparser
import sys
import os
import re
import string

assert len(sys.argv) >= 3, 'invalid number of arguments'
output_filename = sys.argv[1]
input_filenames = sys.argv[2:]


# only allow certain characters in names
def clean_name_segments(s:str)->str:
    new_s = ''
    cand = string.ascii_letters + ' '
    for c in s:
        if c in cand:
            new_s += c

    return new_s

# return a list of authors
# each author name is represented using a list, where name segments are ordered from first name to last name
def process_author_names(s:str)->list:
    s = s.strip()

    braces_match = re.match('\{(.*)\}', s)
    if braces_match:
        return [[clean_name_segments(braces_match.group(1))]]
    
    ret = []

    parts = s.split(' and ')
    for part in parts:
        if ',' in part:
            last, _, first = part.partition(',')
            ret.append(first.split(' ') + [last])
        else:
            ret.append(part.split(' '))

    for item in ret:
        new_item = [clean_name_segments(x.strip()) for x in item]
        new_item = [x for x in new_item if x]
        item.clear()
        item.extend(new_item)

    return ret
    

def get_author_sort_key(e:list):
    return tuple(map(lambda x: x.lower(), reversed(e)))

author_lut = dict()

for fn in input_filenames:
    assert os.path.exists(fn), f'input file "{fn}" does not exist'

    with open(fn) as f:
        bib = bibtexparser.load(f)
    
    for entry in bib.entries:
        entry_name = entry['ID']

        if 'author' not in entry:
            print(f'Entry {entry_name} does not have author field, it is skipped')
            continue
        
        authors = process_author_names(entry['author'].replace('\r', ' ').replace('\n', ' '))

        # sort based on first author
        first_author_key = get_author_sort_key(authors[0])
        if first_author_key not in author_lut:
            author_lut[first_author_key] = []

        author_lut[first_author_key].append(entry_name)

author_sorted = sorted(list(author_lut.keys()))

output_lines = []
author_name_ids = []
for ind, author_name_seg in enumerate(author_sorted):
    author_bib_items = author_lut[author_name_seg]
    author_name_id = (' '.join(author_name_seg)).replace(' ', '-')
    author_name_ids.append(author_name_id)
    output_lines.append(r'\BibAuthorInfo{%s}{%s}' % (author_name_id, ','.join(author_bib_items)))

output_lines.append('\BibAllAuthors{%s}' % ','.join(author_name_ids))

with open(output_filename, 'w') as f:
    f.write('\n'.join(output_lines))

改进了作者排序

def get_author_sort_key(e:list):
    ret = [x.lower() for x in e]
    if len(ret) <= 2:
        return tuple(reversed(ret))
    else:
        ret_ = [ret[-1]] + [[ret[0]] + ret[1:-1]
        return tuple(ret_)

将书目按作者（字母顺序）分组

例子

答案1

解释

LaTeX 源代码（test.tex）

Python 脚本（bib_categorizer.py）

改进了作者排序

相关内容