答案1
编辑:代码现在托管在Github。
感谢 Ulrike 指出了xml
上的接口的存在CTAN
,我能够使用wget
命令和我的luaxml
包创建简单而愚蠢的 lua 脚本。
这个想法是,你将包名称作为此脚本的参数传递,并将biblatex
条目打印在标准输出上。第一个完整的脚本,ctanbib.lua
:
#!/usr/bin/env texlua
kpse.set_program_name("luatex")
-- ctanbib.lua -- export ctan entries to bib format
--
if #arg < 1 or arg[1]=="--help" or arg[1]=="-h" then
print [[ctanbib - convert ctan package information to bibtex format
Usage:
texlua ctanbib <package name>
This command will bibtex entry to the terminal output
]]
os.exit(1)
end
local pkgname = arg[1]
local url = "https://www.ctan.org/xml/pkg/" .. pkgname
-- change that for different title scheme
local titleformat = "The %s package"
local bibtexformat = [[
@manual{$package,
title = {$title},
subtitle = {$subtitle},
author = {$author},
url = {$url},
urldate = {$urldate},
date = {$date},
version = {$version}
}
]]
local xml = require('luaxml-mod-xml')
local handler = require('luaxml-mod-handler')
local load_xml = function(url)
local command = io.popen("wget -qO- ".. url,"r")
local info = command:read("*all")
command:close()
if string.len(info) == 0 then
return false
end
--print(pkginfo)
treehandler = handler.simpleTreeHandler()
treehandler.options.noReduce = {authorref=true}
x = xml.xmlParser(treehandler)
x:parse(info)
return treehandler.root
end
local get_authors = function(a)
local authors = {}
local retrieved_authors = {}
-- fix LuaXML "feature"
if #a == 0 then a = {a} end
for _, author in ipairs(a) do
local current = {}
current[#current+1] = author._attr.familyname
current[#current+1] = author._attr.givenname
table.insert(retrieved_authors, table.concat(current, ", "))
end
return table.concat(retrieved_authors," and ")
end
local get_title = function(title)
local title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end)
return string.format(titleformat, title)
end
local get_url = function(home)
local home = home or {}
local attr = home["_attr"] or {}
local href = attr.href or "http://www.ctan.org/pkg/"..pkgname
return href
end
local get_version = function(version)
local version = version or {}
local attr = version["_attr"] or {}
return attr["number"], attr["date"]
end
local bibtex_escape = function(a)
local a = a or ""
return a:gsub("([%$%{%}])", function(x) return '\\'..x end)
end
local compile = function(template, records)
return template:gsub("$([a-z]+)", function(a)
return bibtex_escape(records[a]) or ""
end)
end
local entry = load_xml(url)
if not entry then
print("Cannot find entry for package "..pkgname)
os.exit(1)
end
-- root element is also saved, so we use this trick
local record = entry.entry
local e = {}
e.author = get_authors(record.authorref)
e.package = pkgname
e.title = get_title(record.name)
e.subtitle = record.caption
e.url = get_url(record.home)
e.version, e.date = get_version(record.version)
e.urldate = os.date("%Y-%m-%d")
local result = compile(bibtexformat, e)
print(result)
为了理解这个脚本,我们需要查看 ctan 的 xml 文件:
<entry id="pgf">
<name>pgf</name>
<caption>Create PostScript and PDF graphics in TeX</caption>
<authorref id="auth:tantau"/>
<authorref id="auth:feuersaenger"/>
<copyright owner="Till Tantau" year="2005-2014"/>
<license type="lppl1.3"/>
<version number="3.0.0" date="2013-12-20"/>
<description>PGF is a macro package for creating graphics.
It is platform- and format-independent and works together
with the most important TeX backend drivers, including pdftex and
dvips. It comes with a user-friendly syntax layer called TikZ.
<p/>
Its usage is similar to
<ref refid="pstricks">pstricks</ref> and the standard picture
environment. PGF works with plain (pdf-)TeX, (pdf-)LaTeX, and
ConTeXt. Unlike <ref refid="pstricks">pstricks</ref>, it
can produce either PostScript or PDF output.</description>
<documentation details="Readme" href="ctan:/graphics/pgf/base/README"/>
<documentation details="PGF Manual" href="ctan:/graphics/pgf/base/doc/pgfmanual.pdf"/>
<documentation details="Minimal introduction to TikZ" href="http://cremeronline.com/LaTeX/minimaltikz.pdf"/>
<home href="http://sourceforge.net/projects/pgf/"/>
<ctan path="/graphics/pgf/base" file="true"/>
<install path="/graphics/pgf/base/pgf.tds.zip"/>
<miktex location="pgf"/>
<texlive location="pgf"/>
<keyval key="topic" value="graphics-in-tex"/>
<keyval key="index" value=""/>
</entry>
请注意,作者保存在authorref
元素中,元素中不包含姓名,而只包含对其他文件的引用,这些文件包含作者信息。还请注意,没有可用的标题,只有包名称和标题,可以用作副标题。
包含作者信息的 xml 文件如下所示:
<author key="tantau" givenname="Till" familyname="Tantau"/>
因此,对于每位作者,我们还需要获取这个带有名称的文件。
现在我们可以看看这个脚本的某些部分:
local titleformat = "The %s package"
我们需要标题的格式,因为我们只得到包名称。
local bibtexformat = [[
@manual{$package,
title = {$title},
subtitle = {$subtitle},
author = {$author},
url = {$url},
urldate = {$urldate},
date = {$date},
version = {$version}
}
]]
这些是用于打印 bibtex 条目的格式。$strings
将被替换为相应的解析字段。
local load_xml = function(url)
local command = io.popen("wget -qO- ".. url,"r")
local info = command:read("*all")
command:close()
if string.len(info) == 0 then
return false
end
--print(pkginfo)
treehandler = handler.simpleTreeHandler()
x = xml.xmlParser(treehandler)
x:parse(info)
return treehandler.root
end
此函数获取 url 并返回解析后的 xml。
local get_authors = function(a)
local authors = {}
local retrieved_authors = {}
if #a == 0 then
table.insert(authors,a)
else
authors = a
end
for _, v in ipairs(authors) do
local id = v["_attr"]["key"]
local author_url = "http://www.ctan.org/xml/author/".. id
local authorxml = load_xml(author_url)
if not authorxml then
print("Warning: cannot load author info for: " .. id)
else
local x = authorxml.author["_attr"]
table.insert(retrieved_authors, x.familyname.. ", " .. x.givenname)
end
end
return table.concat(retrieved_authors," and ")
end
该函数load_authors
最为复杂,因为受到的限制luaxml
,当只有一个元素和存在两个或多个元素时,它使用不同的表结构。表规范化后,将为每个作者处理 xml 文件并构建作者字段。
local get_title = function(title)
local title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end)
return string.format(titleformat, title)
end
包名的首字母大写(我们使用unicode库来支持重音字符)。然后titleformat
用于构建标题。
local get_url = function(home)
local home = home or {}
local attr = home["_attr"] or {}
local href = attr.href or "http://www.ctan.org/pkg/"..pkgname
return href
end
有些包已经homepage
设置了,对于没有设置的包,我们使用ctan
路径
local get_version = function(version)
local version = version or {}
local attr = version["_attr"] or {}
return attr["number"], attr["date"]
end
解析版本号和发布日期,并非所有软件包都有这些!
local bibtex_escape = function(a)
local a = a or ""
return a:gsub("([%$%{%}])", function(x) return '\\'..x end)
end
转义特殊字符。这是非常基本的版本。我假设使用biblatex
和biber
so编码。如果我们想将重音字符编码为代码(如),这将使脚本变得更加复杂。utf8
bibtex
{\v{c}}
local compile = function(template, records)
return template:gsub("$([a-z]+)", function(a)
return bibtex_escape(records[a]) or ""
end)
end
将值放入模板的函数。所有字段都在records
表中
local e = {}
e.author = get_authors(record.authorref)
e.package = pkgname
e.title = get_title(record.name)
e.subtitle = record.caption
e.url = get_url(record.home)
e.version, e.date = get_version(record.version)
e.urldate = os.date("%Y-%m-%d")
local result = compile(bibtexformat, e)
print(result)
有一个包含所有字段的表格,并且打印了 bibtex 条目。
现在我们可以尝试代码:
texlua ctanbib.lua tex4ht
将打印:
@manual{tex4ht,
title = {The Tex4ht package},
subtitle = {Convert (La)TeX to HTML/XML},
author = {Gurari, Eitan M. and Berry, Karl and Radhakrishnan, C. V.},
url = {http://tug.org/tex4ht},
urldate = {2014-09-12},
date = {2008-06-16},
version = {}
}
我们可以使用标准方法将输出通过管道传输到文件
texlua ctanbib.lua tex4ht > sample.bib
还有一些文件
texlua ctanbib.lua luaxml >> sample.bib
texlua ctanbib.lua pgf >> sample.bib
我们可以用一些示例文件来测试结果:
\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{biblatex}
\addbibresource{sample.bib}
\begin{document}
\nocite{*}
\printbibliography
\end{document}
结果: