

pip install akshare requests pandas matplotlib tkinter要获取基金的净值数据,需要精确匹配代码才行,一开始我想的是直接输入代码查询,但很麻烦,很多人并不知道自己的基金代码是多少,只知道是哪方面的,因此必须实现中文检索,但直接查很难精确匹配,而且每次去获取名称很慢。所以我写了段代码先爬取全量基金名称及代码,存入本地json。程序直接在本地查询,匹配到代码后再去爬取对应代码的数据。
这里又出现了另一个坑,东财爬取的数据是名称是拼音简写。最后用akshare才解决。
爬取全量基金中文名称代码如下:
def crawl_full_fund_database() -> List[Dict]:"""用AKShare爬取全量基金数据,返回格式:[{"code": "000001", "name": "华夏成长混合A"}, ...]"""print("开始爬取全量基金中文全称+代码(约28000+只),请稍候...")print("="*50)try:# 1. 调用AKShare官方接口,获取全量基金数据(东方财富源,无反爬)start_time = time.time()fund_df = ak.fund_name_em() # 核心接口:返回所有公募基金数据end_time = time.time()if fund_df.empty:print("❌ 爬取失败:接口返回空数据(可能网络问题,重试即可)")return []# 2. 数据清洗:提取中文全称+代码,过滤无效数据fund_list = []for _, row in fund_df.iterrows():fund_code = str(row["基金代码"]).strip()fund_name = str(row["基金简称"]).strip() # 基金简称即中文全称,无拼音# 过滤条件:代码6位+名称非空+排除异常数据if len(fund_code) == 6 and fund_name and not fund_name.isdigit():fund_list.append({"code": fund_code,"name": fund_name})# 3. 去重(按代码去重,避免重复数据)fund_dict = {fund["code"]: fund for fund in fund_list}final_funds = list(fund_dict.values())total_count = len(final_funds)print(f"✅ 爬取成功!共获取 {total_count} 只基金(耗时{end_time - start_time:.2f}秒)")print(f"📌 示例数据(前5条):")for i in range(min(5, total_count)):print(f" {final_funds[i]['code']} → {final_funds[i]['name']}")print("="*50)return final_fundsexcept Exception as e:print(f"❌ 爬取异常:{str(e)}")return []def save_to_json(fund_list: List[Dict], filename: str = "fund_database_full.json") -> None:"""保存基金数据到UTF-8编码的JSON文件"""if not fund_list:print("⚠️ 无数据可保存")returntry:with open(filename, "w", encoding="utf-8") as f:json.dump(fund_list, f, ensure_ascii=False, indent=4)print(f"✅ 已保存到 {filename}(UTF-8编码,共{len(fund_list)}条数据)")except Exception as e:print(f"❌ 保存失败:{str(e)}")
爬取的数据存入本地fund_database_full.json。不会的小白可以直接用我爬取下来的,后台获取。
def get_fund_data(self, fund_code):def _get_html(page=1):url = f"http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={fund_code}&page={page}&per=20"headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36"}try:res = requests.get(url, headers=headers, timeout=10)res.raise_for_status()res.encoding = "utf-8"return res.textexcept Exception as e:self.root.after(0, lambda: messagebox.showerror("错误", f"爬取净值失败:{str(e)}"))return ""self.root.after(0, lambda: self.loading_label.config(text="正在获取净值数据,请稍候..."))self.root.update()html = _get_html()if not html:self.loading_label.config(text="")return pd.DataFrame()pattern = re.compile('pages:(.*),')total_page = int(re.search(pattern, html).group(1)) if re.search(pattern, html) else 1soup = BeautifulSoup(html, 'html.parser')heads = [th.get_text().strip() for th in soup.find_all("th") if th.get_text()] or ['净值日期', '单位净值']records = []for page in range(1, total_page+1):page_html = _get_html(page)if not page_html:continuesoup = BeautifulSoup(page_html, 'html.parser')tbody = soup.find("tbody")if not tbody:continuefor tr in tbody.find_all("tr"):row = [td.get_text().strip() for td in tr.find_all("td")]if len(row) >= len(heads):records.append(row[:len(heads)])if not records:self.loading_label.config(text="")return pd.DataFrame()df = pd.DataFrame(records, columns=heads)df['净值日期'] = pd.to_datetime(df['净值日期'], errors='coerce')df['单位净值'] = pd.to_numeric(df['单位净值'], errors='coerce')df = df.dropna(subset=['净值日期', '单位净值']).sort_values('净值日期').set_index('净值日期')self.root.after(0, lambda: self.loading_label.config(text="数据获取完成!"))self.root.after(3000, lambda: self.loading_label.config(text=""))return df.tail(60)# 绘制净值图def draw_chart(self, fund_df, fund_name, fund_code):for widget in self.chart_frame.winfo_children():widget.destroy()self.fund_name_label.config(text=f"基金名称:{fund_name}(代码:{fund_code})")fig = plt.Figure(figsize=(10, 5), dpi=100)ax = fig.add_subplot(111)fund_df['单位净值'].plot(ax=ax, color='#e63946', linewidth=2)ax.set_title(f'{fund_name} 近60天净值走势', fontsize=14, pad=10)ax.set_xlabel('日期', fontsize=12)ax.set_ylabel('单位净值', fontsize=12)ax.grid(True, linestyle='--', alpha=0.7)canvas = FigureCanvasTkAgg(fig, master=self.chart_frame)canvas.draw()canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)# 主查询逻辑def query_fund(self):input_str = self.fund_input_entry.get().strip()if not input_str:messagebox.showwarning("提示", "请输入基金关键词或代码!")returnself.query_btn.config(state="disabled")self.root.config(cursor="wait")try:# 1. 检索基金matched_funds = self._search_fund(input_str)if not matched_funds:messagebox.showwarning("提示", f"未找到「{input_str}」匹配的基金!")return# 2. 列表框弹窗选择(单击选中+双击确认)selected = self._select_fund_dialog(matched_funds)if not selected: # 取消选择returnfund_name, fund_code = selected# 3. 获取净值数据fund_df = self.get_fund_data(fund_code)if fund_df.empty:messagebox.showwarning("提示", f"未获取到{fund_name}({fund_code})的净值数据!")self.fund_name_label.config(text="")return# 4. 绘制图表self.draw_chart(fund_df, fund_name, fund_code)finally:self.query_btn.config(state="normal")self.root.config(cursor="arrow")