因为我的导师不喜欢审阅我的 PDF 格式的论文,所以我必须找到一个解决方案来将我的.tex
文件转换为.doc
文件。但是,许多论坛上给出的解决方案超出了我的水平。所以我使用以下方法来处理我的问题。
- 使用 LaTeX 生成 PDF;
- 使用可靠的 PDF 转换器将 PDF 转换为 Word。
但生成的word文件存在很多问题,例如
有些行末单词用连字符
-
: 分隔(对于这个问题,我使用\usepackage[none]{hyphenat}
来解决);段落被意外地分成了几个段落。为此,我想用 latex 在 PDF 文件中生成一个段落符号。例如,在每个段落后,
_ppp_
用 LaTeX 格式在 PDF 中添加一个段落符号(例如)。有了这个段落符号,我可以轻松地将其替换_ppp_
为 Office Word 中的普通段落。问题是如何?**
而且,我还想通过添加符号(例如_h1_
或_h2_
)来格式化该节和/或小节
我只是想知道如何在 papagraph 末尾添加一些内容。就像这样,
第 1_h1_ 节
第 1_h2_ 小节
段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容。_ppp_
段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容,段落内容。_ppp_
第 1_h1_ 节
第 2_h2_ 小节
段落内容,段落内容,段落内容,段落内容。_ppp_
答案1
如果您知道任何 VBA,您可以遍历文本文件,当您找到时,\section{...}
将内容放入{}
不同的样式(或只是更大+粗体)并附加_h1_
。我正在为目前正在撰写的论文做这件事。
以下代码是从我的 word 2003 的 normal.dot 中粘贴的,删除了一些个人内容。您需要添加Microsoft 正则表达式到您的 VBA 项目的引用。没有尝试处理重要的数学或表格,但如果图片以 .png 格式存在,则会加载它们。我发现对于正文、标题等,为了进行协作编辑,这比通过 pandoc 或我尝试过的任何其他转换器效果更好。我只是在向 .tex 源添加新命令时不断添加它(因此部分实现acronym
已经过时了)。
Option Explicit
Option Base 0
Sub ImportTexFile()
If Application.Dialogs(wdDialogFileOpen).Show <> -1 Then End
ActiveDocument.SaveAs FileName:=Left(ActiveDocument.FullName, Len(ActiveDocument.FullName) - 4) & "-import.doc", FileFormat _
:=wdFormatDocument, LockComments:=False, Password:="", AddToRecentFiles:=True, WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:= _
False, SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:=False
End Sub
'ReadTeX reads in a .TeX file and tries to interpret it.
' make sure graphics are available in .png, e.g. use imagemagick:
'FOR %a in (*.pdf) DO convert -density 300 "%~a" "%~na.png"
Sub ReadTeX()
Dim strFName As String
Dim oReg As New RegExp
Dim strLineArr() As String
Dim strText As String
Dim strtemp As String
Dim iLine As Integer
Dim bInBody As Boolean
Dim bTempBody As Boolean
Dim strArg As String, strCom As String
Dim bInMath As Boolean
Dim iPic As Integer
Dim bKillComment As Boolean 'delete comments? alternative is to grey out
Dim UcodeBase As Integer
Dim strAcro() As String
bKillComment = True
'strText = ActiveDocument.Range.Text
ImportTexFile
strFName = ActiveDocument.FullName
strLineArr = Split(ActiveDocument.Content.Text, Chr(13))
setOptions
ReDim strAcro(2, 1) As String
'throw away all preamble and anything after \end(document)
bInBody = False
For iLine = 0 To UBound(strLineArr)
bTempBody = bInBody
If Left(strLineArr(iLine), 16) = "\begin{document}" Then bTempBody = True
If Left(strLineArr(iLine), 14) = "\end{document}" Then
bTempBody = False
bInBody = False
End If
If Not bInBody Then
If Left(Trim(strLineArr(iLine)), 8) = "\newacro" Then
strtemp = Trim(strLineArr(iLine))
strtemp = Right(strtemp, Len(strtemp) - 8)
ReDim Preserve strAcro(2, UBound(strAcro, 2) + 1) As String
strAcro(0, UBound(strAcro, 2)) = Mid(strtemp, 2, InStr(strtemp, "}") - 2)
strAcro(1, UBound(strAcro, 2)) = _
Mid(strtemp, 4 + Len(strAcro(0, UBound(strAcro, 2))), InStr(4 + Len(strAcro(0, UBound(strAcro, 2))), strtemp, "}") - 4 - Len(strAcro(0, UBound(strAcro, 2))))
strAcro(2, UBound(strAcro, 2)) = "False" ' has this acronym been spelt out yet
End If
strLineArr(iLine) = ""
End If
bInBody = bTempBody
Next
Application.ScreenUpdating = False
ActiveDocument.Range.Text = ""
For iLine = 0 To UBound(strLineArr)
If Len(strLineArr(iLine)) > 0 Then ActiveDocument.Range.Text = ActiveDocument.Range.Text & strLineArr(iLine)
Next
Application.ScreenRefresh
'display after removal of preamble etc.
Application.ScreenUpdating = False
'line-by-line cleanup of some common commands and some of my stuff - anything that doesn't need formatting
strLineArr = Split(ActiveDocument.Content.Text, Chr(13))
For iLine = 0 To UBound(strLineArr)
strText = strLineArr(iLine)
'\slash
oReg.Global = True
oReg.Multiline = False
oReg.Pattern = "\slash({}|\s)?"
strText = oReg.Replace(strText, "/")
'\hyp (hyphen)
oReg.Pattern = "\hyp(({})|\s)?"
strText = oReg.Replace(strText, "-")
'~ (nbsp)'regex is overkill here, but never mind
oReg.Pattern = "~"
strText = oReg.Replace(strText, Chr(160))
'--- (emdash)'regex is overkill here, but never mind
oReg.Pattern = "---"
strText = oReg.Replace(strText, ChrW(&H2014))
'-- (endash)'regex is overkill here, but never mind
oReg.Pattern = "--"
strText = oReg.Replace(strText, ChrW(&H2013))
'\cite, \citeauthor (slightly cleverer) - square brackets around citation key
oReg.Pattern = "\cite(author|numns)?{([\w, \?-_[]]*)}"
strText = oReg.Replace(strText, "[$2]")
' (slightly cleverer) - square brackets around citation key
'\author, email, affiliation - just return bare
oReg.Pattern = "\(author|email|affiliation){([\w.,@ \?]*)}"
strText = oReg.Replace(strText, "$2")
'\maketitle - just remove
oReg.Pattern = "\maketitle"
strText = oReg.Replace(strText, "")
'comments starting with % - running to end of line - but only if % isn't preceded by \
oReg.Pattern = "([^\]%|^%).*$"
strText = oReg.Replace(strText, "")
'remove figure placement commands
oReg.Pattern = "[[htbp!H]+]"
strText = oReg.Replace(strText, "")
'my stuff
'\um{} to give micron
oReg.Global = True
oReg.Multiline = False
oReg.Pattern = "\um({})?"
strText = oReg.Replace(strText, Chr(181) & "m")
'\textdeg{} to give degree
oReg.Global = True
oReg.Multiline = False
oReg.Pattern = "\textdeg({})?"
strText = oReg.Replace(strText, "°")
'\textasciitilde{} to give tilde
oReg.Global = True
oReg.Multiline = False
oReg.Pattern = "\textasciitilde({})?"
strText = oReg.Replace(strText, "~")
strLineArr(iLine) = strText
Next
ActiveDocument.Range.Text = ""
For iLine = 0 To UBound(strLineArr)
If Len(strLineArr(iLine)) > 0 Then ActiveDocument.Range.Text = ActiveDocument.Range.Text & strLineArr(iLine)
Next
Selection.HomeKey unit:=wdStory
Selection.EndKey unit:=wdStory, Extend:=True
Selection.Style = "Normal"
Selection.ParagraphFormat.Alignment = wdAlignParagraphJustify
'now it's easiest to walk the document to allow formatting
bInMath = False
Selection.HomeKey unit:=wdStory
Do Until ActiveDocument.Bookmarks("\Sel").End = ActiveDocument.Bookmarks("\EndOfDoc").End
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
If Selection.Text = "\" Then 'here's where it gets interesting
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
If InStr("#$%&^_{}~", Right(Selection.Text, 1)) Then 'first deal with simple escaped characters
Selection.Text = Right(Selection.Text, 1)
ElseIf Selection.Text = "\" Then Selection.Text = Chr(13) 'and forced line breaks
Else
Selection.MoveRight unit:=wdWord, Count:=1, Extend:=True
strCom = Trim(Selection.Text)
' If strCom = "\Wmk" Then Stop
Selection.Delete
strArg = CaptureBraces
Select Case strCom
Case "\comment": 'block comments in the code - either delete or grey-out
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
If bKillComment Then
Selection.Delete
Else
Selection.Font.Color = wdColorGray40
Selection.Font.Size = Selection.Font.Size - 2
Selection.MoveRight unit:=wdCharacter, Count:=1
End If
Selection.Delete
Case "\warn": 'make a word comment from a note to self
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 1, Extend:=True
Selection.Delete
Selection.Comments.Add Range:=Selection.Range
Selection.TypeText Text:=strArg
Selection.EscapeKey
If ActiveWindow.ActivePane.Index > 1 Then ActiveWindow.ActivePane.Close
'not yet working through the argument of this one
Case "\section":
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
Selection.Font.Bold = True
Selection.Font.Size = Selection.Font.Size + 4
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg) 'need to work through the argument
Case "\subsection":
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
Selection.Font.Bold = True
Selection.Font.Size = Selection.Font.Size + 2
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg) 'need to work through the argument
Case "\ref": 'do nothing for now - leave as ref for cross referencing later
Case "\emph":
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
Selection.Font.Italic = True
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg) + 1 'need to work through the argument
Case "\title":
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
Selection.Font.Bold = True
Selection.Font.Size = Selection.Font.Size + 8
Selection.MoveRight unit:=wdWord, Count:=1
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg)
Case "\caption":
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg), Extend:=True
Selection.Font.Size = Selection.Font.Size - 2
Selection.ParagraphFormat.Alignment = wdAlignParagraphCenter
Selection.ParagraphFormat.LeftIndent = Selection.ParagraphFormat.LeftIndent + InchesToPoints(0.5)
Selection.ParagraphFormat.RightIndent = Selection.ParagraphFormat.RightIndent + InchesToPoints(0.5)
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg) + 1 'need to work through the caption
Case "\label": 'do nothing - keep the label in braces
Case "\begin":
Select Case strArg
Case "abstract", "figure":
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=True
Selection.Delete
End Select
Case "\end":
Select Case strArg
Case "abstract", "figure":
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=True
Selection.Delete
End Select
Case "\si" 'siunitx package - try to interpret units, or just leave as text
Selection.Delete
'Selection.Delete
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg)
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=Len(strArg)
Case "\textsuperscript":
doSuper (strArg)
Case "\textsubscript":
doSub (strArg)
Case "\bibliographystyle", "\bibliography": 'stuf to just wipe out
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=2
Selection.Delete
Case "\includegraphics":
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=True
Selection.Delete
Selection.InlineShapes.AddPicture FileName:=ActiveDocument.Path & Application.PathSeparator & Trim(strArg) & ".png", LinkToFile:=False, SaveWithDocument:=True
iPic = ActiveDocument.InlineShapes.Count
'ipic =Selection.InlineShapes.AddPicture(ActiveDocument.Path & Application.PathSeparator & Trim(strArg) & ".png", False, True)
With ActiveDocument.InlineShapes(iPic - 1)
.Height = InchesToPoints(3.5) * .Height / .Width
.Width = InchesToPoints(3.5)
End With
'Math mode stuff - can test bInMath (see below)
Case "\leq":
Selection.InsertBefore (ChrW(8804))
Case "\times":
Selection.InsertBefore (ChrW(215))
'greek letters
Case "\alpha", "\Alpha", "\beta", "\Beta", "\gamma", "\Gamma", "\delta", "\Delta", "\epsilon", "\Epsilon", "\zeta", "\Zeta", "\eta", "\Eta", "\theta", "\Theta", "\iota", "\Iota", "\kappa", "\Kappa", "\lambda", "\Lambda", "\mu", "\Mu", "\nu", "\Nu", "\xi", "\Xi", "\omicron", "\Omicron", "\pi", "\Pi", "\rho", "\Rho", "\sigma", "\Sigma", "\tau", "\Tau", "\upsilon", "\Upsilon", "\phi", "\Phi", "\chi", "\Chi", "\psi", "\Psi", "\omega", "\Omega":
If LCase(Mid(strCom, 2, 1)) = Mid(strCom, 2, 1) Then 'lower case
UcodeBase = 945
Else 'upper case
UcodeBase = 913
End If
strCom = LCase(strCom)
Selection.InsertAfter ChrW(UcodeBase + FindGreek(Right(Trim(strCom), Len(Trim(strCom)) - 1)))
Case "\acl", "\acp", "\acf", "\acfp", "\ac", "\acp", "\acs", "\acsp", "\aclp":
'acronyms
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=True
Selection.Delete
For iLine = LBound(strAcro, 2) To UBound(strAcro, 2)
If strAcro(0, iLine) = strArg Then Exit For
Next
Select Case strCom
Case "\acs":
Selection.InsertBefore (strAcro(0, iLine))
Case "\acsp":
Selection.InsertBefore (strAcro(0, iLine)) & "s"
Case "\acl":
Selection.InsertBefore (strAcro(1, iLine))
Case "\aclp":
Selection.InsertBefore (strAcro(1, iLine)) & "s"
Case "\acf":
Selection.InsertBefore (strAcro(1, iLine)) & " (" & (strAcro(0, iLine)) & ")"
Case "\acfp":
Selection.InsertBefore (strAcro(1, iLine)) & "s (" & (strAcro(0, iLine)) & "s)"
Case "\ac":
If strAcro(2, iLine) = True Then
Selection.InsertBefore (strAcro(0, iLine))
Else
Selection.InsertBefore (strAcro(1, iLine))
strAcro(2, iLine) = True
End If
Case "\acp":
If strAcro(2, iLine) = True Then
Selection.InsertBefore (strAcro(0, iLine)) & "s"
Else
Selection.InsertBefore (strAcro(1, iLine)) & "s"
strAcro(2, iLine) = True
End If
End Select
Case "\acresetall":
For iLine = LBound(strAcro, 2) To UBound(strAcro, 2)
strAcro(2, iLine) = "False"
Next
Case Else
Stop
Debug.Print strCom
'just put strArg back
If Selection = "{" Then
Selection.MoveRight unit:=wdCharacter, Count:=Len(strArg) + 2, Extend:=True
Selection.Delete
End If
Selection.InsertBefore (strArg)
End Select
End If
End If
If Selection.Text = "$" Then
bInMath = Not (bInMath)
Selection.Delete
Selection.MoveLeft unit:=wdCharacter, Count:=1
End If
If Selection.Text = "^" Then 'superscript
Selection.Delete
strArg = CaptureBraces
doSuper (strArg)
End If
'If Selection.Text = "" Then Stop
If Selection.Text = "" And bInMath Then 'superscript (in math mode)
Selection.Delete
strArg = CaptureBraces
doSub (strArg)
End If
Loop 'walking doc
'finish
Application.ScreenUpdating = True
Application.StatusBar = False
ActiveDocument.Save
End Sub
Function FindGreek(searchstring As String) As Integer
Dim greek(1 To 24) As String
Dim x As Integer
Dim match As Integer
greek(1) = "alpha"
greek(2) = "beta"
greek(3) = "gamma"
greek(4) = "delta"
greek(5) = "epsilon"
greek(6) = "zeta"
greek(7) = "eta"
greek(8) = "theta"
greek(9) = "iota"
greek(10) = "kappa"
greek(11) = "lambda"
greek(12) = "mu"
greek(13) = "nu"
greek(14) = "xi"
greek(15) = "omicron"
greek(16) = "pi"
greek(17) = "rho"
greek(18) = "sigma"
greek(19) = "tau"
greek(20) = "upsilon"
greek(21) = "phi"
greek(22) = "chi"
greek(23) = "psi"
greek(24) = "omega"
For x = LBound(greek) To UBound(greek)
If greek(x) = searchstring Then
match = x
Exit For
End If
Next x
FindGreek = match
End Function
Sub doSuper(strX As String)
If Selection.Text = "{" Then Selection.Delete
If Len(strX) >= 1 Then 'bracketed expression
Selection.MoveRight unit:=wdCharacter, Count:=Len(strX), Extend:=True
Selection.Font.Superscript = True
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Else 'single char
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Font.Superscript = True
Selection.MoveRight unit:=wdCharacter, Count:=1
End If
Selection.Font.Superscript = False
Selection.MoveLeft unit:=wdCharacter, Count:=1
End Sub
Sub doSub(strX As String)
If Selection.Text = "{" Then Selection.Delete
If Len(strX) >= 1 Then 'bracketed expression
Selection.MoveRight unit:=wdCharacter, Count:=Len(strX), Extend:=True
Selection.Font.Subscript = True
Selection.MoveRight unit:=wdCharacter, Count:=1
Selection.Delete
Else 'single char
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=True
Selection.Font.Subscript = True
Selection.MoveRight unit:=wdCharacter, Count:=1
End If
Selection.Font.Subscript = False
Selection.MoveLeft unit:=wdCharacter, Count:=1
End Sub
Function CaptureBraces()
Dim strRet As String
Dim iBraceCount As Integer
iBraceCount = 0
ActiveDocument.Bookmarks.Add Name:="CJH_cap_brace"
If Selection.Text = " " Then Selection.MoveRight unit:=wdCharacter, Count:=1
If Selection.Text = "" Then Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=1
If Selection.Text <> "{" Then
strRet = ""
Exit Function
End If
'iBraceCount = 1
Do
Selection.MoveRight unit:=wdCharacter, Count:=1, Extend:=1
Select Case Right(Selection.Text, 1)
Case "{": iBraceCount = iBraceCount + 1
Case "}": iBraceCount = iBraceCount - 1
Case Else
End Select
Loop Until iBraceCount = 0
strRet = Mid(Selection.Text, 2, Len(Selection.Text) - 2)
ActiveDocument.Bookmarks("CJH_cap_brace").Select
CaptureBraces = strRet
End Function
Sub setOptions()
With Options
.PasteAdjustWordSpacing = False
End With
End Sub