import requestsfrom bs4 import BeautifulSoupimport timeimport jsonimport base64import warningsfrom urllib.parse import urlparse, parse_qs# 彻底移除所有警告warnings.filterwarnings('ignore')requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)def crawl_bing(keyword, pages=2): """ 爬取必应搜索结果(仅标题+真实URL) :param keyword: 搜索关键词 :param pages: 爬取页数,默认2页 :return: 格式化的JSON字符串结果 """ headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', 'Referer': 'https://cn.bing.com/' } #解析真实URL的核心函数 def get_real_url(redirect_url): try: parsed = urlparse(redirect_url) u_param = parse_qs(parsed.query).get('u', [''])[0] if u_param: u_param += '=' * (4 - len(u_param) % 4) return base64.b64decode(u_param).decode('utf-8') except: try: resp = requests.head(redirect_url, headers=headers, timeout=5, allow_redirects=True, verify=False) return resp.url except: return redirect_url return redirect_url result = {'keyword': keyword, 'search_results': []} serial_number = 1 for page in range(1, pages + 1): try: params = {'q': keyword, 'first': 1 + (page - 1) * 10, 'FORM': 'PERE'} resp = requests.get('https://cn.bing.com/search', params=params, headers=headers, timeout=8, verify=False) soup = BeautifulSoup(resp.text, 'html.parser') for item in soup.find_all('li', class_='b_algo'): title_elem = item.find('h2') url_elem = title_elem.find('a') if title_elem else None if title_elem and url_elem and url_elem.get('href'): result['search_results'].append({ 'serial_number': serial_number, 'title': title_elem.get_text(strip=True), 'url': get_real_url(url_elem.get('href')) }) serial_number += 1 #序号自增 time.sleep(2) except Exception as e: print(f'Error: {e}') #返回JSON结果 return json.dumps(result, ensure_ascii=False, indent=2)if __name__ == '__main__': search_keyword = 'Python 爬虫 最佳实践' crawl_result = crawl_bing(search_keyword, pages=2) print(crawl_result)
{ "keyword": "Python 爬虫 最佳实践", "search_results": [ { "serial_number": 1, "title": "现在装什么Python的版本? - 知乎", "url": "https://www.zhihu.com/question/1917491857404978513" }, { "serial_number": 2, "title": "如何系统地自学 Python? - 知乎", "url": "https://www.zhihu.com/question/29138020" }, { "serial_number": 3, "title": "CPython是什么?PyPy是什么?Python和这两个东西有什么 ...", "url": "https://www.zhihu.com/question/20005950" }, { "serial_number": 4, "title": "有没有适合新手练习 Python 的做题类网站? - 知乎", "url": "https://www.zhihu.com/question/442492817" }, { "serial_number": 5, "title": "Python 从入门到精通推荐看哪些书籍呢? - 知乎", "url": "https://www.zhihu.com/question/487006622" }, { "serial_number": 6, "title": "你是如何自学 Python 的? - 知乎", "url": "https://www.zhihu.com/question/20702054" }, { "serial_number": 7, "title": "Python|如何安装seaborn?", "url": "https://www.zhihu.com/tardis/bd/ans/3434535423" }, { "serial_number": 8, "title": "如何入门 Python 爬虫? - 知乎", "url": "https://www.zhihu.com/question/20899988" }, { "serial_number": 9, "title": "学 Python 都用来干嘛的? - 知乎", "url": "https://www.zhihu.com/question/34098079" }, { "serial_number": 10, "title": "Python 打包成 exe,太大了该怎么解决? - 知乎", "url": "https://www.zhihu.com/question/281858271" }, { "serial_number": 11, "title": "现在装什么Python的版本? - 知乎", "url": "https://www.zhihu.com/question/1917491857404978513" }, { "serial_number": 12, "title": "如何系统地自学 Python? - 知乎", "url": "https://www.zhihu.com/question/29138020" }, { "serial_number": 13, "title": "CPython是什么?PyPy是什么?Python和这两个东西有什么 ...", "url": "https://www.zhihu.com/question/20005950" }, { "serial_number": 14, "title": "有没有适合新手练习 Python 的做题类网站? - 知乎", "url": "https://www.zhihu.com/question/442492817" }, { "serial_number": 15, "title": "Python 从入门到精通推荐看哪些书籍呢? - 知乎", "url": "https://www.zhihu.com/question/487006622" }, { "serial_number": 16, "title": "你是如何自学 Python 的? - 知乎", "url": "https://www.zhihu.com/question/20702054" }, { "serial_number": 17, "title": "Python|如何安装seaborn?", "url": "https://www.zhihu.com/tardis/bd/ans/3434535423" }, { "serial_number": 18, "title": "如何入门 Python 爬虫? - 知乎", "url": "https://www.zhihu.com/question/20899988" }, { "serial_number": 19, "title": "学 Python 都用来干嘛的? - 知乎", "url": "https://www.zhihu.com/question/34098079" }, { "serial_number": 20, "title": "Python 打包成 exe,太大了该怎么解决? - 知乎", "url": "https://www.zhihu.com/question/281858271" } ]}