我正在尝试使用 从 JSON 文件中呈现一份大约 2300 页的文档lualatex
。
创建一份文档大约需要 32 秒。该文档由一系列表格和段落组成。我认为 32 秒太慢了,我希望能够将准备文档的总时间缩短到 16 秒(页数相同)。
我使用 进行了所有初始实验pdflatex
,发现渲染速度要快得多。但由于必须动态生成文档,而文档的长度可能在 20 到 5000 页之间,因此我决定使用lualatex
。
我将解释我所做的一切以及产生了什么。
以下是 JSON(美化 JSON 以便于查看),文档就是从这里呈现的。我已大大减少了这个问题的 JSON 大小。但你可以想象更多部分和小节里面tableOfContents
。
{
"documentName": "AA",
"author": "BB",
"documentId": "MAC-FF/1",
"documentGeneratedAt": "BB",
"documentGeneratedBy": "FF",
"macProtocolID": "CV/1",
"version": "1",
"headerFooter": {
"header": {
"left": "LFT",
"right": "RT"
},
"footer": {
"left": "F/1"
}
},
"tableOfContents": [
{
"section": {
"title": "Actives Limit Calculations",
"text": "This is section text"
},
"subsection": [
{
"id": "Pr1",
"title": "Febuxostat Tablets 40 mg 80 mg and 120 mg (Pr1)",
"content": [
{
"type": "text",
"line": "Worst SAL: 0.725 mg/sqcm"
},
{
"type": "text",
"line": "Next Production: Dabigatran Etexilate Capsules 75 mg 110 mg and 150 mg (Pr2)"
},
{
"type": "text",
"line": "Next PSwab Limit: 1813.458 ppm Swab Area: 25 sqcm Swab Volume: 10 ml"
},
{
"type": "table",
"title": "",
"columns": [
{
"name": "Previous API(A)",
"values": [
"1",
"2",
"3",
"4"
]
},
{
"name": "Next Production",
"values": [
"Pr1",
"Pr2",
"Pr3",
"Pr4"
]
},
{
"name": "ID (B)",
"values": [
"Pr1",
"Pr2",
"Pr3",
"Pr4"
]
}
]
}
]
}
]
}
]
}
为了从上述 JSON 准备文档,我读取 JSON,然后遍历以准备其中的文档\luacode
。下面是我执行此操作的方式。
\documentclass[a4paper,12pt]{report}
\usepackage{luacode}
\begin{luacode}
local socket = require("socket")
local json = require("json")
local file = io.open("sample.json")
local start = socket.gettime()
tab = json.parse(file:read("*all"))
texio.write_nl("Time passed parsing JSON: " .. socket.gettime() - start .. " s\string\n")
file:close()
\end{luacode}
\begin{luacode}
local function isempty(s)
return s == nil or s == ''
end
function renderSections(tab)
for i,k in pairs(tab['tableOfContents']) do
local sec_start = socket.gettime()
tex.print ("\string\\section{" .. k.section.title .. "}")
tex.print("section text here")
for ii,kk in pairs(k["subsection"]) do
tex.print ("\string\\subsection{" .. kk.title .. "}")
tex.print ("some text here")
tex.print("\string\\newline")
for iii, kkk in pairs(kk["content"]) do
if kkk['type'] == "text" then
tex.print(kkk["line"])
tex.print("\string\\newline")
elseif kkk["type"] == "table" then
local column_count = 0
local row_count = 0
col_schema = ""
for _ in pairs(kkk["columns"]) do
column_count = column_count + 1
col_schema = col_schema .. " | l"
end
col_schema = col_schema .. "|"
for _ in pairs(kkk["columns"][1]["values"]) do row_count = row_count + 1 end
if not isempty(kkk.title) then
tex.print("\string\\textbf{" .. kkk.title .."}")
tex.print("\\\\[0.1in]")
end
tex.print("\string\\begin{tabular}{" .. col_schema .. "}")
tex.print("\\hline")
for ci, c in pairs(kkk["columns"]) do
tex.print(c.name)
if ci < column_count then
tex.print(" & ")
end
end
tex.print("\\\\")
tex.print("\\hline")
i = 1
while i <= row_count do
local c_c = 1
while c_c <= column_count do
tex.print(kkk["columns"][c_c]["values"][i])
if c_c < column_count then
tex.print(" & ")
end
c_c = c_c + 1
end
tex.print("\\\\")
tex.print("\\hline")
i = i + 1
end
tex.print("\string\\end{tabular}")
tex.print("\\\\[0.1in]")
end
end
end
texio.write_nl("Time passed rendering section: " .. socket.gettime() - sec_start .. " s\string\n")
end
end
\end{luacode}
\title{\directlua{tex.print(tab['documentName'])}}
\author{\directlua{tex.print(tab['author'])}}
\usepackage{fancyhdr}
\usepackage{pgffor}
\usepackage{lastpage}
%\usepackage[extreme]{savetrees}
\usepackage[landscape, margin=1in]{geometry}
\usepackage[yyyymmdd,hhmmss]{datetime}
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
linkcolor=blue,
filecolor=magenta,
urlcolor=cyan,
pdftitle={AAA Report},
bookmarks=true,
pdfpagemode=FullScreen,
}
% https://en.wikibooks.org/wiki/LaTeX/Colors
\usepackage[svgnames]{xcolor}
\pagestyle{fancy}
\fancyhf{}
\lhead{\directlua{tex.print(tab['headerFooter']['header']['left'])}}
\rhead{\directlua{tex.print(tab['headerFooter']['header']['right'])}}
\lfoot{\directlua{tex.print(tab['headerFooter']['footer']['left'])}}
\rfoot{\thepage\ of \pageref{LastPage}}
\cfoot{}
\renewcommand*\contentsname{TABLE OF CONTENTS}
\begin{document}
\maketitle
\paragraph{Document Information}
\subparagraph{Document information is given in the table below}
\subparagraph{}
\begin{tabular}{|c| c| c|}
\hline
\textbf{S.No} & \textbf{Item} & \textbf{Information} \\
\hline
1 & Document ID & \directlua{tex.print(tab['documentId'])} \\
\hline
2 & Document Generated At & \directlua{tex.print(tab['documentGeneratedAt'])} \\
\hline
3 & Document Generated By & \directlua{tex.print(tab['documentGeneratedBy'])} \\
\hline
4 & AAA Protocol ID & \directlua{tex.print(tab['macProtocolID'])} \\
\hline
\end{tabular}
%\newpage
%\textcolor{NavyBlue}{TABLE OF CONTENTS}
\tableofcontents
\directlua{renderSections(tab)}
\end{document}
json.lua
这也是上述代码用来读取 JSON 文件的帮助程序(在同一目录中)。
local lpeg = assert(require("lpeg"))
local C, Cf, Cg, Ct, P, R, S, V =
lpeg.C, lpeg.Cf, lpeg.Cg, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V
-- number parsing
local digit = R"09"
local dot = P"."
local eE = S"eE"
local sign = S"+-"^-1
local mantissa = digit^1 * dot * digit^0 + dot * digit^1 + digit^1
local exponent = (eE * sign * digit^1)^-1
local real = sign * mantissa * exponent / tonumber
-- optional whitespace
local ws = S" \t\n\r"^0
-- match a literal string surrounded by whitespace
local lit = function(str)
return ws * P(str) * ws
end
-- match a literal string and synthesize an attribute
local attr = function(str,attr)
return ws * P(str) / function() return attr end * ws
end
-- JSON grammar
local json = P{
"object",
value =
V"null_value" +
V"bool_value" +
V"string_value" +
V"real_value" +
V"array" +
V"object",
null_value =
attr("null", nil),
bool_value =
attr("true", true) + attr("false", false),
string_value =
ws * P'"' * C((P'\\"' + 1 - P'"')^0) * P'"' * ws,
real_value =
ws * real * ws,
array =
lit"[" * Ct((V"value" * lit","^-1)^0) * lit"]",
member_pair =
Cg(V"string_value" * lit":" * V"value") * lit","^-1,
object =
lit"{" * Cf(Ct"" * V"member_pair"^0, rawset) * lit"}"
}
return { parse = function(str) return assert(json:match(str)) end }
我还做了一些分析:
- 读取 JSON 需要不到一秒
- 渲染片段和子
tex.print ("\string\\section{" .. k.section.title .. "}")
片段~1微秒
我想了解时间都浪费在哪里了。我能做些什么来减少渲染时间?有没有更好的方法来构建我的乳胶,以帮助减少总准备时间?