

# ========== 核心:数据库初始化(创建fund_info + fund_nav表) ==========def init_database(self):"""初始化数据库,创建2张核心表+全文索引"""conn = sqlite3.connect(DB_FILE)cursor = conn.cursor()# 表1:fund_info(基金基础信息+自选标记)#关注公众号"python万事屋"获取更多源码cursor.execute('''CREATE TABLE IF NOT EXISTS fund_info (fund_code TEXT PRIMARY KEY, -- 基金代码唯一主键fund_name TEXT NOT NULL, -- 基金全称is_favorite INTEGER DEFAULT 0, -- 是否自选:0=否,1=是first_crawl DATETIME, -- 首次爬取时间last_crawl DATETIME, -- 最后爬取时间crawl_status TEXT DEFAULT '未爬取', -- 爬取状态:未爬取/已完成/中断update_time DATETIME DEFAULT CURRENT_TIMESTAMP -- 记录更新时间)''')# 表2:fund_nav(所有基金净值数据)cursor.execute('''CREATE TABLE IF NOT EXISTS fund_nav (fund_code TEXT,nav_date DATE,unit_nav REAL, -- 单位净值update_time DATETIME DEFAULT CURRENT_TIMESTAMP,PRIMARY KEY (fund_code, nav_date), -- 复合主键:避免重复FOREIGN KEY (fund_code) REFERENCES fund_info(fund_code))''')# 索引优化(核心提速点)cursor.execute('CREATE INDEX IF NOT EXISTS idx_fund_code ON fund_nav(fund_code)')cursor.execute('CREATE INDEX IF NOT EXISTS idx_nav_date ON fund_nav(nav_date)')cursor.execute('CREATE INDEX IF NOT EXISTS idx_fund_name ON fund_info(fund_name)') # 新增名称索引conn.commit()conn.close()print("✅ 数据库初始化成功!已创建fund_info + fund_nav表 + 索引")# ========== 核心:从数据库读取基金净值数据 ==========def get_fund_data_from_db(self, fund_code):"""从fund_nav表读取基金净值,返回DataFrame"""sql = '''SELECT nav_date, unit_navFROM fund_navWHERE fund_code=?ORDER BY nav_date ASC'''result = self.db_query(sql, (fund_code,))if not result:return pd.DataFrame()# 转成DataFrame(适配原有计算/绘图逻辑)data = [(row['nav_date'], row['unit_nav']) for row in result]df = pd.DataFrame(data, columns=['净值日期', '单位净值'])df['净值日期'] = pd.to_datetime(df['净值日期'], errors='coerce')df['单位净值'] = pd.to_numeric(df['单位净值'], errors='coerce')df = df.dropna().set_index('净值日期')return dfdef on_day_change(self, event):"""切换数据天数后,重新加载对应天数的净值数据并绘图"""if not self.current_fund:messagebox.showwarning("提示", "请先查询并选中基金!")returnfund_name, fund_code = self.current_fundtarget_days = int(self.day_var.get())self.loading_label.config(text=f"正在加载{fund_name}近{target_days}天净值数据...")self.root.update()# 从数据库取对应天数的数据,无则爬取#关注公众号"python万事屋"获取更多源码fund_df = self.get_fund_data_from_db(fund_code)if fund_df.empty:fund_df = self.crawl_fund_nav(fund_code, fund_name, target_days=target_days)else:# 筛选对应天数的数据fund_df_sorted = fund_df.sort_index(ascending=True)latest_date = fund_df_sorted.index[-1]start_date = latest_date - timedelta(days=target_days)fund_df = fund_df_sorted[fund_df_sorted.index >= start_date]if fund_df.empty:messagebox.showwarning("提示", f"未获取到{fund_name}近{target_days}天的净值数据!")self.loading_label.config(text="")returnself.draw_chart(fund_df, fund_name, fund_code)self.loading_label.config(text="数据加载完成!")self.root.after(3000, lambda: self.loading_label.config(text=""))# ========== 核心:爬取净值数据 + 存入数据库==========def crawl_fund_nav(self, fund_code, fund_name, target_days=None):"""爬取基金净值,默认使用界面选择的天数"""# 若未传天数,取界面选择的天数if target_days is None:target_days = int(self.day_var.get())"""爬取基金净值,存入fund_nav表,同时更新fund_info表,支持增量爬取"""# 1. 先更新fund_info表(首次爬取则插入,非首次则更新状态)now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')fund_exist = self.db_query('SELECT 1 FROM fund_info WHERE fund_code=?', (fund_code,))if not fund_exist:self.db_execute('INSERT INTO fund_info (fund_code, fund_name, first_crawl, crawl_status) VALUES (?, ?, ?, ?)',(fund_code, fund_name, now, '爬取中'))else:self.db_execute('UPDATE fund_info SET crawl_status=?, last_crawl=? WHERE fund_code=?',('爬取中', now, fund_code))# 2. 爬取核心逻辑(增量爬取,按需停止)#关注公众号"python万事屋"获取更多源码def _get_html(page=1):url = f"http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={fund_code}&page={page}&per=100" # per=100减少分页headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/128.0.0.0 Safari/537.36"}try:res = requests.get(url, headers=headers, timeout=8)res.raise_for_status()res.encoding = "utf-8"return res.textexcept Exception as e:print(f"爬取第{page}页失败:{e}")return ""# 3. 分页爬取 + 增量存入数据库collected_count = 0 # 已收集数据条数page = 1conn = sqlite3.connect(DB_FILE)cursor = conn.cursor()while True:html = _get_html(page)if not html:breaksoup = BeautifulSoup(html, 'html.parser')tbody = soup.find("tbody")if not tbody:break# 解析单页数据rows = tbody.find_all("tr")if not rows:breakfor tr in rows:tds = tr.find_all("td")if len(tds) < 2:continuenav_date = tds[0].get_text().strip() # 净值日期unit_nav = tds[1].get_text().strip() # 单位净值if not nav_date or not unit_nav.replace('.', '').isdigit():continue# 增量存储:复合主键已保证不重复,直接插入即可cursor.execute('''INSERT OR IGNORE INTO fund_nav (fund_code, nav_date, unit_nav)VALUES (?, ?, ?)''', (fund_code, nav_date, float(unit_nav)))collected_count += 1# 按需停止:达到目标天数则退出,不用爬全量if collected_count >= target_days:breakpage += 1time.sleep(0.5) # 防反爬# 4. 更新爬取状态cursor.execute('UPDATE fund_info SET crawl_status=?, last_crawl=? WHERE fund_code=?',('已完成', now, fund_code))conn.commit()conn.close()print(f"✅ {fund_code} 爬取完成,共存入{collected_count}条净值数据")# 5. 返回爬取后的数据库数据return self.get_fund_data_from_db(fund_code)