# pandas read excel file error
# 报错如下
>>> df = pd.read_excel(s)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1513, in __init__
self._reader = self._engines[engine](self._io, storage_options=storage_options)
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 549, in __init__
super().__init__(filepath_or_buffer, storage_options=storage_options)
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 540, in __init__
self.book = self.load_workbook(self.handles.handle)
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 560, in load_workbook
return load_workbook(
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 346, in load_workbook
reader.read()
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 289, in read
self.read_strings()
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 147, in read_strings
self.shared_strings = reader(src)
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/strings.py", line 16, in read_string_table
for _, node in iterparse(xml_source):
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/defusedxml/common.py", line 105, in iterparse
parser = DefusedXMLParser(target=_TreeBuilder(),
File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/defusedxml/ElementTree.py", line 68, in __init__
_XMLParser.__init__(self, html, target, encoding)
TypeError: __init__() takes 1 positional argument but 4 were given
# 解决办法
根据上面的提示,是defusedxml
包构造_XMLParser
类对象时报错,新的构造函数只接受1
个参数。
查看当前安装的defusedxml
包的信息如下:
pip show defusedxml
Name: defusedxml
Version: 0.5.0
Summary: XML bomb protection for Python stdlib modules
Home-page: https://github.com/tiran/defusedxml
Author: Christian Heimes
Author-email: christian@python.org
License: PSFL
Location: /xx/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages
Requires:
Required-by: nbconvert
解决方法是更新defusedxml
包,如下:
pip install defusedxml==0.7.1
更新完成就可以使用pandas
正常解析excel
文件了:
import pandas as pd
s = "/xx/Desktop/000300perf.xlsx"
df = pd.read_excel(s)
df.head(5)
# 日期Date 指数代码Index Code ... 成交金额(亿元)Turnover 样本数量ConsNumber
# 0 20230813 300 ... 1976.24 300
# 1 20230814 300 ... 1976.24 300
# 2 20230815 300 ... 1774.37 300