# pandas read excel file error

# 报错如下

>>> df = pd.read_excel(s)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
    io = ExcelFile(io, storage_options=storage_options, engine=engine)
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1513, in __init__
    self._reader = self._engines[engine](self._io, storage_options=storage_options)
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 549, in __init__
    super().__init__(filepath_or_buffer, storage_options=storage_options)
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 540, in __init__
    self.book = self.load_workbook(self.handles.handle)
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 560, in load_workbook
    return load_workbook(
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 346, in load_workbook
    reader.read()
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 289, in read
    self.read_strings()
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 147, in read_strings
    self.shared_strings = reader(src)
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/openpyxl/reader/strings.py", line 16, in read_string_table
    for _, node in iterparse(xml_source):
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/defusedxml/common.py", line 105, in iterparse
    parser = DefusedXMLParser(target=_TreeBuilder(),
  File "/mnt/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages/defusedxml/ElementTree.py", line 68, in __init__
    _XMLParser.__init__(self, html, target, encoding)
TypeError: __init__() takes 1 positional argument but 4 were given

# 解决办法

根据上面的提示,是defusedxml包构造_XMLParser类对象时报错,新的构造函数只接受1个参数。

查看当前安装的defusedxml包的信息如下:

pip show defusedxml
Name: defusedxml
Version: 0.5.0
Summary: XML bomb protection for Python stdlib modules
Home-page: https://github.com/tiran/defusedxml
Author: Christian Heimes
Author-email: christian@python.org
License: PSFL
Location: /xx/data/sw/anaconda3/envs/detectron/lib/python3.9/site-packages
Requires: 
Required-by: nbconvert

解决方法是更新defusedxml包,如下:

pip install defusedxml==0.7.1

更新完成就可以使用pandas正常解析excel文件了:

import pandas as pd
s = "/xx/Desktop/000300perf.xlsx"
df = pd.read_excel(s)
df.head(5)
#      日期Date  指数代码Index Code  ... 成交金额(亿元)Turnover 样本数量ConsNumber
# 0  20230813             300  ...          1976.24            300
# 1  20230814             300  ...          1976.24            300
# 2  20230815             300  ...          1774.37            300
(adsbygoogle = window.adsbygoogle || []).push({});

# reference

1.https://stackoverflow.com/questions/64725684/pandas-read-excel-raises-typeerror-init-takes-1-positional-argument-but-4 (opens new window)