我填写了一堆 Word 表格,需要将这些数据导入 Excel/CSV/任何结构化格式。我在网上看到过如何一次完成的解决方案,但有没有批量完成的既定方法?
在编写 powershell 脚本之前我想问一下。
答案1
以下是我用 Python 给出的解决方案:
"""
Copyright 2009 Konrads Smelkovs <[email protected]>
UTF8Recorder and UnicodeWriter come from python docs
"""
import sys,os,csv
import win32com.client
import pywintypes
import codecs, cStringIO
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
def main():
if len(sys.argv)<3:
print "Usage: %s <directory> <outfile.csv>" % sys.argv[0]
print "Where <directory> - directory containing word docs with forms"
print "and <outfile.csv> - file where to put results"
sys.exit(-1)
directory=os.path.abspath(sys.argv[1])
wordapp = win32com.client.Dispatch("Word.Application")
wordapp.Visible=0 # Hide word app
results=[]
for docfile in os.listdir(directory):
thisdocresults=[]
if docfile.endswith(".doc") or docfile.endswith(".docx"):
print >> sys.stderr, "Processing %s" % docfile
worddoc=wordapp.Documents.Open(os.path.join(directory,docfile))
for i in range(1,worddoc.FormFields.Count+1):
try:
form=worddoc.FormFields.Item(i)
name=form.Name
value=form.Result
thisdocresults.append((name,value))
try:
print >>sys.stderr, "%s: %s" % (name,value)
except UnicodeEncodeError,e:
print >>sys.stderr, "Error decoding charset,%s" % e
except pywintypes.com_error,e:
print >>sys.stderr, "Exception: %s" % str(e)
results.append(thisdocresults)
worddoc.Close()
csvfile=file(sys.argv[2],"wb")
csvwriter=UnicodeWriter(csvfile,quoting=csv.QUOTE_ALL)
for docres in results:
data=[]
for (n,v) in docres:
data.append(v)
csvwriter.writerow(data)
wordapp.Quit()
if __name__=="__main__":
main()
将其转换为 powershell 很简单。如果有人真的需要,请给我发邮件