Python 解析 PDF 转换为csv


pdfplumber 这个库相对另外几个 解析出来的更容易理解,返回list,还可以将PDF中的表格形式返回给console
# -*- coding: utf-8 -*-
import pdfplumber
import datetime
import sys
import os


# 交互
try:
    pdfName = input("输入PDF文件名:")
    with pdfplumber.open(pdfName) as pdf:
        first_page = pdf.pages[0]
        print(len(pdf.pages))
        i = 1
        data = []
        for page in pdf.pages:
            for table in page.extract_tables():
                if i % 2 == 0:
                    # print(i, table[1])
                    print(i, table[1][1])
                    # 判断qty是否是纯数字
                    if table[1][1].isdigit():
                        print('单sku和多数量的订单', table[1][0], table[1][1])
                        for x in range(int(table[1][1])):
                            data.append(table[1][0])
                    else:
                        # qty不是纯数字,多行文本会用\n链接
                        print('多个sku多数量的订单', table[1][0], table[1][1])
                        skuList = table[1][0].split('\n')
                        qtyList = table[1][1].split('\n')
                        # ['WF-DC-TAUPE-T', 'WF-DC-GREY-T'] ['2', '1']
                        print(skuList, qtyList)
                        for s in range(len(skuList)):
                            for q in qtyList[s]:
                                data.append(skuList[s])
                i = i + 1
    print(data)
    with open("result.csv", "w", newline='\r\n') as file:
        file.write('SKU' + '\n')
        for v in data:
            print(v)
            file.write(v + '\n')
except Exception as valueError:
    print('pdf文件错误:' + valueError)
    # windows
    os.system('pause')


发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注