Lua 可以实现很多很酷的事情,例如段落回调帮助进行寡妇/孤儿手动调整。
在最终版本中,我经常“手动”检查分页符前的最后一行是否没有连字符。可以通过一些 Lua 魔法自动报告此问题吗?如何实现?
或者更好的是,Lua 是否可以将带连字符的单词放入\hbox
并重新排列段落 - 希望它会在同一个地方断开,而该单词只是向前或向后移动一点?
答案1
除了将带连字符的单词放入 之外\hbox
,这需要实际确定单词的开始和结束位置,还有另一种方法:每当页面在自由选择节点(例如代表连字符的 TeX 节点)处断开时,重新排列段落并附加一个约束:此时不要断开。这很容易,因为 Lua 代码可以更改与自由选择节点处的换行相关的惩罚。当然,重新排列文本后,新的断点可能仍然是自由选择节点。然后我们只需重复该过程,直到我们对断点感到满意为止。
这还没有用大量文档进行测试,因此它可能会在更复杂的情况下出现故障。
代码(注释中的一些解释):(替换\includecomment
为\excludecomment
以禁用代码并查看效果中的“正常”换行。)
\documentclass{article}
\usepackage[english]{babel}
\usepackage{blindtext,luacode}
\usepackage{comment}
% \excludecomment{nobroken}
\includecomment{nobroken}
\begin{nobroken}
\begin{luacode*}
local discardable_id = {
[node.id'glue'] = true,
[node.id'kern'] = true,
[node.id'penalty'] = true,
}
-- A small helper to use properties. They are used to store the paragraphs without linebreaks.
local function swap_prop(n, new) -- If new is `false`, it is not changed. Use `nil` to clear the prop
local p = node.getproperty(n)
if not p then
p = {}
node.setproperty(n, p)
end
local prop = p.nohyphout
p.nohyphout = new == false and prop or new
return prop
end
-- Actually save the paragraphs
local function pre(n, ctxt)
if ctxt ~= "" or n.id ~= 9 then return true end -- Nothing to see here, but at least unlikely
local discretionaries, m = {}, node.copy_list(n)
swap_prop(n, {unbroken = m, discretionaries = discretionaries})
while n do -- We have to save the correspondance between the original and the copied nodes
if n.id == 7 then -- For performance reasons, we only save the discretionaries. We never change other nodes anyway
discretionaries[node.direct.todirect(n)] = m
end
m, n = m.next, n.next
end
return true
end
-- After linebreaking, we save the node of the last line. This allows deleting "wrong" versions later on.
luatexbase.add_to_callback("pre_linebreak_filter", pre, "save unbroken") -- Of course, saving the unbroken paragraph only works before linebreaking
local post = function(n, ctxt)
if ctxt ~= "" then return true end
while n and n.id ~= 0 do n = n.next end -- There often comes glue/penalties/other stuff in front of the first actual line
local head = n.head
if not head or head.id ~= 9 then return true end -- Nothing to see here, but at least unlikely
local prop = swap_prop(head, false)
if prop and prop.unbroken then
prop.parend = node.tail(n)
end
return true
end
luatexbase.add_to_callback("post_linebreak_filter", post, "save parend")
-- The important functionality: We define a macro \adjustbreaks, which looks at the output box. If the end does not end with hyphen, the first TeX parameter is executed, otherwise a new attempt is made and the second parameter is executed.
local luafunc = luatexbase.new_luafunction"adjustbreaks"
token.set_lua("adjustbreaks", luafunc, "global", "protected")
lua.get_functions_table()[luafunc] = function()
local list = tex.box[255]
local last, lastline
for hlist in node.traverse(list.head) do
if hlist.id == 0 and hlist.head and hlist.head.id == 9 then
if last and last.unbroken then
node.flush_list(last.unbroken)
last = nil
end
local new = swap_prop(hlist.head, nil)
if new and new.unbroken then
last, new.parbegin = new, hlist
end
end
if hlist.id == 0 and last then
if last.parend == hlist then
node.flush_list(last.unbroken)
last = nil
end
lastline = hlist
end
end
if last then -- We reached the end of a page and it ends with a broken paragraph which we *could* change.
local lastnondiscard
for n in node.traverse(lastline.head) do
if not discardable_id[n.id] then
lastnondiscard = n
end
end
if lastnondiscard and lastnondiscard.id == 7 then -- We end with a discretionary. Let's change that!
-- This is the interesting part:
-- We probably never reach this point if the paragraph isn't fully "contributed" to the outer vlist,
-- so last.parend is in the list starting at tex.lists.contrib_head
-- We want to remove the already broken paragraph, so reset the head to the node *after* our last line.
do
local curr, afterpar = tex.lists.contrib_head, last.parend.next
while curr ~= afterpar do curr = node.free(curr) end
tex.lists.contrib_head = curr
end
-- Also forbid the current linebreak, otherwise this wouldn't do anything
last.discretionaries[node.direct.todirect(lastnondiscard)].penalty = 10000
-- We might need multiple passes and after linebreaking the `unbroken` list will no longer be unbroken,
-- So we run our callbacks again to fix everything up. Then run the linebreaking system again.
-- We should probaby set tome parameters, but to get them we have to add a `linebreak_filter` callback.
-- Currently I want to avoid that. FIXME!
pre(last.unbroken, "")
local broken = tex.linebreak(last.unbroken) -- TODO: Get params
post(broken, "")
-- Now broken starts probably with some penalties or glue.
-- But we never deleted the glue in front of our paragraph, so adding it again would double it.
-- Instead just skip it:
while broken and broken.id ~= 0 do broken = node.free(broken) end
last.parbegin.prev.next = broken
node.flush_list(last.parbegin)
token.put_next(token.create'@secondoftwo')
return
else
node.flush_list(last.unbroken)
last = nil
end
end
token.put_next(token.create'@firstoftwo')
end
\end{luacode*}
% Now we still have to change the output routine to actually use \adjustbreaks
% First save the old one
\let\myrealoutput\output
\newtoks\output
\output\expandafter{\the\myrealoutput}
% Now set the new output routine. If \adjustbreaks does not find any problems with the current page, just continue with the regular output routine.
% Otherwise put \outputbox back into the page, then TeX tries again to find a pagebreak.
\myrealoutput{\adjustbreaks{\the\output}{\unvbox255}}
\end{nobroken}
\begin{document}
\hyphenpenalty0 -- This is only for testing, you normally do NOT want \hyphenpenalty0 in actual documents.
\blindtext[5]
The quick brown fox jumps over the lazy dog.
Jackdaws love my big Sphinx of Quartz.
Pack my box with five dozen liquor jugs.
The five boxing wizards jump quickly.
Sympathizing would fix Quaker objectives.
Many-wived Jack laughs at probes of sex quiz.
% Turgid saxophones blew over Mick’s jazzy quaff.
% Playing jazz vibe chords quickly excites my wife.
% A large fawn jumped quickly over white zinc boxes.
% Exquisite farm wench gives body jolt to prize stinker.
According to a list of long words on grammerly.com, one of the best known long words in the english language is a word which the Oxforddictionary describes as ``a nonsense word''. Therefore it fits perfectly into this supercalifragilisticexpialidocious text.
\blindtext
\end{document}