# ========== 核心：数据库初始化（创建fund_info + fund_nav表） ==========    def init_database(self):        """初始化数据库，创建2张核心表+全文索引"""        conn = sqlite3.connect(DB_FILE)        cursor = conn.cursor()        # 表1：fund_info（基金基础信息+自选标记）#关注公众号"python万事屋"获取更多源码        cursor.execute('''        CREATE TABLE IF NOT EXISTS fund_info (            fund_code TEXT PRIMARY KEY,  -- 基金代码唯一主键            fund_name TEXT NOT NULL,     -- 基金全称            is_favorite INTEGER DEFAULT 0,  -- 是否自选：0=否，1=是            first_crawl DATETIME,        -- 首次爬取时间            last_crawl DATETIME,         -- 最后爬取时间            crawl_status TEXT DEFAULT '未爬取',  -- 爬取状态：未爬取/已完成/中断            update_time DATETIME DEFAULT CURRENT_TIMESTAMP  -- 记录更新时间        )        ''')        # 表2：fund_nav（所有基金净值数据）        cursor.execute('''        CREATE TABLE IF NOT EXISTS fund_nav (            fund_code TEXT,            nav_date DATE,            unit_nav REAL,  -- 单位净值            update_time DATETIME DEFAULT CURRENT_TIMESTAMP,            PRIMARY KEY (fund_code, nav_date),  -- 复合主键：避免重复            FOREIGN KEY (fund_code) REFERENCES fund_info(fund_code)        )        ''')        # 索引优化（核心提速点）        cursor.execute('CREATE INDEX IF NOT EXISTS idx_fund_code ON fund_nav(fund_code)')        cursor.execute('CREATE INDEX IF NOT EXISTS idx_nav_date ON fund_nav(nav_date)')        cursor.execute('CREATE INDEX IF NOT EXISTS idx_fund_name ON fund_info(fund_name)')  # 新增名称索引        conn.commit()        conn.close()        print("✅ 数据库初始化成功！已创建fund_info + fund_nav表 + 索引")            # ========== 核心：从数据库读取基金净值数据 ==========    def get_fund_data_from_db(self, fund_code):        """从fund_nav表读取基金净值，返回DataFrame"""        sql = '''        SELECT nav_date, unit_nav         FROM fund_nav         WHERE fund_code=?         ORDER BY nav_date ASC        '''        result = self.db_query(sql, (fund_code,))        if not result:            return pd.DataFrame()        # 转成DataFrame（适配原有计算/绘图逻辑）        data = [(row['nav_date'], row['unit_nav']) for row in result]        df = pd.DataFrame(data, columns=['净值日期', '单位净值'])        df['净值日期'] = pd.to_datetime(df['净值日期'], errors='coerce')        df['单位净值'] = pd.to_numeric(df['单位净值'], errors='coerce')        df = df.dropna().set_index('净值日期')        return df    def on_day_change(self, event):        """切换数据天数后，重新加载对应天数的净值数据并绘图"""        if not self.current_fund:            messagebox.showwarning("提示", "请先查询并选中基金！")            return        fund_name, fund_code = self.current_fund        target_days = int(self.day_var.get())        self.loading_label.config(text=f"正在加载{fund_name}近{target_days}天净值数据...")        self.root.update()        # 从数据库取对应天数的数据，无则爬取#关注公众号"python万事屋"获取更多源码        fund_df = self.get_fund_data_from_db(fund_code)        if fund_df.empty:            fund_df = self.crawl_fund_nav(fund_code, fund_name, target_days=target_days)        else:            # 筛选对应天数的数据            fund_df_sorted = fund_df.sort_index(ascending=True)            latest_date = fund_df_sorted.index[-1]            start_date = latest_date - timedelta(days=target_days)            fund_df = fund_df_sorted[fund_df_sorted.index >= start_date]        if fund_df.empty:            messagebox.showwarning("提示", f"未获取到{fund_name}近{target_days}天的净值数据！")            self.loading_label.config(text="")            return        self.draw_chart(fund_df, fund_name, fund_code)        self.loading_label.config(text="数据加载完成！")        self.root.after(3000, lambda: self.loading_label.config(text=""))    # ========== 核心：爬取净值数据 + 存入数据库==========    def crawl_fund_nav(self, fund_code, fund_name, target_days=None):        """爬取基金净值，默认使用界面选择的天数"""        # 若未传天数，取界面选择的天数        if target_days is None:            target_days = int(self.day_var.get())        """爬取基金净值，存入fund_nav表，同时更新fund_info表，支持增量爬取"""        # 1. 先更新fund_info表（首次爬取则插入，非首次则更新状态）        now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')        fund_exist = self.db_query('SELECT 1 FROM fund_info WHERE fund_code=?', (fund_code,))        if not fund_exist:            self.db_execute('INSERT INTO fund_info (fund_code, fund_name, first_crawl, crawl_status) VALUES (?, ?, ?, ?)',                            (fund_code, fund_name, now, '爬取中'))        else:            self.db_execute('UPDATE fund_info SET crawl_status=?, last_crawl=? WHERE fund_code=?',                            ('爬取中', now, fund_code))        # 2. 爬取核心逻辑（增量爬取，按需停止）#关注公众号"python万事屋"获取更多源码        def _get_html(page=1):            url = f"http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={fund_code}&page={page}&per=100"  # per=100减少分页            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/128.0.0.0 Safari/537.36"}            try:                res = requests.get(url, headers=headers, timeout=8)                res.raise_for_status()                res.encoding = "utf-8"                return res.text            except Exception as e:                print(f"爬取第{page}页失败：{e}")                return ""        # 3. 分页爬取 + 增量存入数据库        collected_count = 0  # 已收集数据条数        page = 1        conn = sqlite3.connect(DB_FILE)        cursor = conn.cursor()        while True:            html = _get_html(page)            if not html:                break            soup = BeautifulSoup(html, 'html.parser')            tbody = soup.find("tbody")            if not tbody:                break            # 解析单页数据            rows = tbody.find_all("tr")            if not rows:                break            for tr in rows:                tds = tr.find_all("td")                if len(tds) < 2:                    continue                nav_date = tds[0].get_text().strip()  # 净值日期                unit_nav = tds[1].get_text().strip()  # 单位净值                if not nav_date or not unit_nav.replace('.', '').isdigit():                    continue                # 增量存储：复合主键已保证不重复，直接插入即可                cursor.execute('''                INSERT OR IGNORE INTO fund_nav (fund_code, nav_date, unit_nav)                VALUES (?, ?, ?)                ''', (fund_code, nav_date, float(unit_nav)))                collected_count += 1            # 按需停止：达到目标天数则退出，不用爬全量            if collected_count >= target_days:                break            page += 1            time.sleep(0.5)  # 防反爬        # 4. 更新爬取状态        cursor.execute('UPDATE fund_info SET crawl_status=?, last_crawl=? WHERE fund_code=?',                       ('已完成', now, fund_code))        conn.commit()        conn.close()        print(f"✅ {fund_code} 爬取完成，共存入{collected_count}条净值数据")        # 5. 返回爬取后的数据库数据        return self.get_fund_data_from_db(fund_code)

源码领取

关注我，后续还会升级模拟交易、持仓汇总、打包成EXE等功能，用Python做专属的基金管理工具～

#Python实战 #基金工具 #爬虫 #可视化 #新手教程

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

用 Python 做自己的基金交易系统(3):数据库重构 + 秒级查询!附成品

本期效果抢先看

本期主要更新内容

源码领取

最新文章

热门文章

随机文章

用 Python 做自己的基金交易系统(3):数据库重构 + 秒级查询!附成品

本期效果抢先看

本期主要更新内容

源码领取

MicroPython 远程终端 & 文件管理器

纯Python手搓AI Agent:绕开LangGraph与AutoGPT的自主

最新文章

热门文章

随机文章