1. 概念与定义
1.1 什么是推导式?
推导式(Comprehensions)是 Python 提供的一种简洁、高效的创建序列的语法结构。它可以用一行代码替代多行循环语句,让代码更加 Pythonic。
基本语法模式:
[expression for item in iterable if condition]
1.2 四种推导式
1.2.1 列表推导式(List Comprehension)
最常用、最基础的推导式。
基础示例:
# 传统方式squares = []for i in range(10): squares.append(i ** 2)# 推导式方式squares = [i ** 2for i in range(10)]print(squares) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
带条件的推导式:
# 获取1-20之间的偶数平方even_squares = [i ** 2for i in range(1, 21) if i % 2 == 0]print(even_squares) # [4, 16, 36, 64, 100, 144, 196, 256, 324, 400]# 三元表达式在推导式中的应用numbers = [1, -2, 3, -4, 5, -6]abs_numbers = [x if x > 0else -x for x in numbers]print(abs_numbers) # [1, 2, 3, 4, 5, 6]
1.2.2 字典推导式(Dict Comprehension)
用于快速创建字典。
基础示例:
# 创建数字及其平方的映射square_dict = {x: x**2for x in range(1, 6)}print(square_dict) # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}# 交换字典的键和值original = {'a': 1, 'b': 2, 'c': 3}reversed_dict = {value: key for key, value in original.items()}print(reversed_dict) # {1: 'a', 2: 'b', 3: 'c'}# 带条件的字典推导式scores = {'Alice': 85, 'Bob': 92, 'Charlie': 78, 'David': 95}passed = {name: score for name, score in scores.items() if score >= 80}print(passed) # {'Alice': 85, 'Bob': 92, 'David': 95}
1.2.3 集合推导式(Set Comprehension)
用于创建无重复元素的集合。
基础示例:
# 去除列表中的重复元素并计算平方numbers = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]unique_squares = {x**2for x in numbers}print(unique_squares) # {1, 4, 9, 16}# 提取字符串中的唯一字符text = "hello world"unique_chars = {char for char in text if char != ' '}print(unique_chars) # {'h', 'e', 'l', 'o', 'w', 'r', 'd'}
1.2.4 生成器推导式(Generator Comprehension)
使用圆括号,返回生成器对象,节省内存。
基础示例:
# 创建生成器,不会立即计算所有值squares_gen = (x**2for x in range(1000000))print(type(squares_gen)) # <class 'generator'># 按需取值print(next(squares_gen)) # 0print(next(squares_gen)) # 1print(next(squares_gen)) # 4# 与列表推导式的内存对比import syslist_comp = [x**2for x in range(1000)]gen_comp = (x**2for x in range(1000))print(f"列表推导式内存: {sys.getsizeof(list_comp)} bytes")print(f"生成器推导式内存: {sys.getsizeof(gen_comp)} bytes")
1.3 嵌套推导式
推导式嵌套的求值顺序遵循 从左到右、从外到内 的原则。理解这个顺序对于写出正确的代码至关重要。
嵌套列表推导式:
# 展平二维列表matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]flattened = [num for row in matrix for num in row]print(flattened) # [1, 2, 3, 4, 5, 6, 7, 8, 9]# 创建乘法表multiplication_table = [[i * j for j in range(1, 6)] for i in range(1, 6)]for row in multiplication_table: print(row)# 输出:# [1, 2, 3, 4, 5]# [2, 4, 6, 8, 10]# [3, 6, 9, 12, 15]# [4, 8, 12, 16, 20]# [5, 10, 15, 20, 25]
求值示例:
flattened = [num for row in matrix for num in row]
等价于以下嵌套循环
flattened_loop = []for row in matrix: # 外层循环先执行 for num in row: # 内层循环后执行 flattened_loop.append(num)
嵌套字典推导式:
# 创建嵌套字典结构users = ['Alice', 'Bob', 'Charlie']scores = [85, 92, 78]# 创建用户-成绩-状态的嵌套字典user_data = { user: {'score': score,'status': 'passed'if score >= 80else'failed' }for user, score in zip(users, scores)}print(user_data)# 输出:# {# 'Alice': {'score': 85, 'status': 'passed'},# 'Bob': {'score': 92, 'status': 'passed'},# 'Charlie': {'score': 78, 'status': 'failed'}# }
2. 实用技巧
2.1 性能优化技巧
技巧1:使用生成器表达式处理大数据
# 错误做法:一次性加载所有数据defsum_squares_bad(n):return sum([x**2for x in range(n)]) # 创建大列表,浪费内存# 正确做法:使用生成器表达式defsum_squares_good(n):return sum(x**2for x in range(n)) # 生成器,内存友好# 性能对比import timeimport memory_profiler@memory_profiler.profiledefcompare_methods(): n = 10**7# 列表推导式版本 start = time.time() result1 = sum([x**2for x in range(n)]) time1 = time.time() - start# 生成器版本 start = time.time() result2 = sum(x**2for x in range(n)) time2 = time.time() - start print(f"列表推导式: {time1:.2f}秒") print(f"生成器推导式: {time2:.2f}秒")
技巧2:使用集合推导式去重优化
# 查找两个列表中的共同元素deffind_common_elements(list1, list2):# 传统方式 common = []for item in list1:if item in list2 and item notin common: common.append(item)# 使用集合推导式优化 common_optimized = {item for item in list1 if item in list2}return common_optimized# 性能测试import randomlist1 = [random.randint(1, 1000) for _ in range(10000)]list2 = [random.randint(1, 1000) for _ in range(10000)]
2.2 代码简化技巧
技巧3:替代map和filter函数
# 使用map和filternumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]squared = list(map(lambda x: x**2, numbers))evens = list(filter(lambda x: x % 2 == 0, numbers))# 使用推导式(更清晰)squared = [x**2for x in numbers]evens = [x for x in numbers if x % 2 == 0]# 组合map和filterresult = list(map(lambda x: x**2, filter(lambda x: x % 2 == 0, numbers)))# 推导式版本result = [x**2for x in numbers if x % 2 == 0]
技巧4:处理多个序列
# 并行处理两个列表names = ['Alice', 'Bob', 'Charlie']ages = [25, 30, 35]cities = ['New York', 'London', 'Tokyo']# 创建人员信息列表people = [f"{name} is {age} years old and lives in {city}"for name, age, city in zip(names, ages, cities)]# 条件组合valid_people = [ (name, age)for name, age in zip(names, ages)if age >= 30and name.startswith('C')]
2.3 数据处理技巧
技巧5:数据清洗和转换
# 清洗数据:去除空白、转换大小写、过滤空值raw_data = [' Alice ', 'BOB', ' ', 'Charlie', None, 'david']cleaned_data = [ name.strip().title()for name in raw_dataif name and name.strip()]print(cleaned_data) # ['Alice', 'Bob', 'Charlie', 'David']# 复杂数据转换records = [ {'name': 'Alice', 'age': '25', 'salary': '50000'}, {'name': 'Bob', 'age': '30', 'salary': '60000'}, {'name': 'Charlie', 'age': '35', 'salary': '70000'}]processed = [ {'name': rec['name'],'age': int(rec['age']),'salary': float(rec['salary']),'tax': float(rec['salary']) * 0.2 }for rec in records]
技巧6:矩阵操作
# 矩阵转置matrix = [ [1, 2, 3], [4, 5, 6], [7, 8, 9]]transpose = [[row[i] for row in matrix] for i in range(len(matrix[0]))]print(transpose) # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]# 矩阵对角线元素diagonal = [matrix[i][i] for i in range(len(matrix))]print(diagonal) # [1, 5, 9]# 矩阵条件筛选positive_matrix = [ [x for x in row if x > 0]for row in matrix]
3. 应用示例
示例1:学生成绩分析系统
案例描述: 开发一个学生成绩分析系统,处理包含多个维度的成绩数据,使用推导式进行高效的数据处理。
import statisticsfrom typing import List, Dict, AnyclassStudentGradeAnalyzer:"""学生成绩分析器"""def__init__(self, students_data: List[Dict[str, Any]]):""" 初始化学生数据 students_data: [ {'name': 'Alice', 'grades': [85, 90, 88, 92], 'class': 'A'}, {'name': 'Bob', 'grades': [78, 82, 85, 80], 'class': 'B'}, ... ] """ self.students = students_datadefcalculate_averages(self) -> List[Dict[str, Any]]:"""计算每个学生的平均分"""return [ {'name': student['name'],'class': student['class'],'average': statistics.mean(student['grades']),'max_grade': max(student['grades']),'min_grade': min(student['grades']),'passed': statistics.mean(student['grades']) >= 60 }for student in self.students ]defget_top_performers(self, n: int = 3) -> List[Dict[str, Any]]:"""获取排名前n的学生""" averages = self.calculate_averages()return sorted(averages, key=lambda x: x['average'], reverse=True)[:n]defanalyze_by_class(self) -> Dict[str, Any]:"""按班级分析成绩"""# 按班级分组 class_groups = {}for student in self.students: class_name = student['class']if class_name notin class_groups: class_groups[class_name] = [] class_groups[class_name].append(student)# 计算每个班级的统计信息return { class_name: {'student_count': len(students),'average_score': statistics.mean([ statistics.mean(s['grades']) for s in students ]),'pass_rate': len([ s for s in students if statistics.mean(s['grades']) >= 60 ]) / len(students) * 100,'highest_score': max([ max(s['grades']) for s in students ]),'students': [ {'name': s['name'],'average': statistics.mean(s['grades']) }for s in sorted(students, key=lambda x: statistics.mean(x['grades']), reverse=True) ] }for class_name, students in class_groups.items() }deffind_students_needing_help(self, threshold: float = 60) -> List[Dict]:"""找出需要帮助的学生(平均分低于阈值或有不及格科目)"""return [ {'name': student['name'],'average': statistics.mean(student['grades']),'failing_subjects': len([g for g in student['grades'] if g < 60]),'grades': student['grades'] }for student in self.studentsif statistics.mean(student['grades']) < threshold or any(g < 60for g in student['grades']) ]# 使用示例defrun_grade_analyzer():# 生成测试数据 sample_data = [ {'name': 'Alice', 'grades': [85, 90, 88, 92], 'class': 'A'}, {'name': 'Bob', 'grades': [78, 82, 85, 80], 'class': 'B'}, {'name': 'Charlie', 'grades': [92, 88, 95, 89], 'class': 'A'}, {'name': 'David', 'grades': [65, 70, 68, 72], 'class': 'B'}, {'name': 'Eve', 'grades': [45, 55, 60, 50], 'class': 'A'}, {'name': 'Frank', 'grades': [88, 85, 90, 92], 'class': 'C'}, ] analyzer = StudentGradeAnalyzer(sample_data) print("=== 学生平均成绩 ===")for student in analyzer.calculate_averages(): print(f"{student['name']}: {student['average']:.1f} "f"(passed: {student['passed']})") print("\n=== 班级分析 ===") class_analysis = analyzer.analyze_by_class()for class_name, stats in class_analysis.items(): print(f"\n班级 {class_name}:") print(f" 人数: {stats['student_count']}") print(f" 平均分: {stats['average_score']:.1f}") print(f" 及格率: {stats['pass_rate']:.1f}%") print(f" 最高分: {stats['highest_score']}") print("\n=== 需要帮助的学生 ===")for student in analyzer.find_students_needing_help(): print(f"{student['name']}: 平均分 {student['average']:.1f}, "f"不及格科目数 {student['failing_subjects']}")if __name__ == "__main__": run_grade_analyzer()
示例2:日志分析与监控系统
案例描述: 开发一个日志分析系统,处理大量日志数据,使用推导式进行快速过滤和分析。
import refrom datetime import datetimefrom collections import Counterfrom typing import List, Dict, Generatorimport jsonclassLogAnalyzer:"""日志分析器"""def__init__(self, log_entries: List[str]): self.raw_logs = log_entries self.parsed_logs = self._parse_logs()def_parse_logs(self) -> List[Dict]:"""解析日志条目""" log_pattern = re.compile(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) - 'r'(\w+) - (.*?)(?: - (\{.*\}))?$' )return [ {'timestamp': datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S'),'level': match.group(2),'message': match.group(3),'metadata': json.loads(match.group(4)) if match.group(4) else {} }for log in self.raw_logsfor match in [log_pattern.match(log)]if match ]deffilter_by_level(self, level: str) -> List[Dict]:"""按日志级别过滤"""return [log for log in self.parsed_logs if log['level'] == level]deffilter_by_time_range(self, start_time: datetime, end_time: datetime) -> List[Dict]:"""按时间范围过滤"""return [ log for log in self.parsed_logsif start_time <= log['timestamp'] <= end_time ]defget_error_summary(self) -> Dict[str, any]:"""获取错误摘要""" error_logs = self.filter_by_level('ERROR')return {'total_errors': len(error_logs),'error_types': Counter( self._extract_error_type(log['message']) for log in error_logs ),'errors_by_hour': Counter( log['timestamp'].hour for log in error_logs ),'recent_errors': [ {'time': log['timestamp'].strftime('%H:%M:%S'),'message': log['message'][:50] + '...' }for log in sorted(error_logs, key=lambda x: x['timestamp'], reverse=True)[:5] ] }def_extract_error_type(self, message: str) -> str:"""提取错误类型""" common_errors = ['Timeout', 'Connection', 'Database', 'Memory', 'IO']for error in common_errors:if error in message:return errorreturn'Other'defsearch_logs(self, keyword: str, case_sensitive: bool = False) -> Generator:"""搜索包含关键字的日志(生成器版本,节省内存)"""ifnot case_sensitive: keyword = keyword.lower()return ( log for log in self.parsed_logsif keyword in log['message'].lower() )return ( log for log in self.parsed_logsif keyword in log['message'] )defgenerate_statistics(self) -> Dict:"""生成统计信息"""return {'total_logs': len(self.parsed_logs),'unique_ips': len(set( meta.get('ip') for log in self.parsed_logs for meta in [log['metadata']]if'ip'in meta )),'level_distribution': dict(Counter( log['level'] for log in self.parsed_logs )),'top_endpoints': Counter( meta.get('endpoint', 'unknown')for log in self.parsed_logsfor meta in [log['metadata']]if'endpoint'in meta ).most_common(5),'response_time_stats': self._calculate_response_times(),'hourly_activity': [ {'hour': hour,'count': len([l for l in self.parsed_logs if l['timestamp'].hour == hour]),'errors': len([l for l in self.parsed_logs if l['timestamp'].hour == hour and l['level'] == 'ERROR']) }for hour in range(24) ] }def_calculate_response_times(self) -> Dict:"""计算响应时间统计""" response_times = [ meta['response_time']for log in self.parsed_logsfor meta in [log['metadata']]if'response_time'in meta ]if response_times:return {'avg': sum(response_times) / len(response_times),'max': max(response_times),'min': min(response_times),'p95': sorted(response_times)[int(len(response_times) * 0.95)] }return {}# 使用示例defrun_log_analyzer():# 模拟日志数据 sample_logs = ["2024-01-15 08:23:45 - INFO - User logged in - {\"ip\": \"192.168.1.1\", \"user_id\": 123}","2024-01-15 08:24:12 - INFO - Page accessed - {\"ip\": \"192.168.1.2\", \"endpoint\": \"/api/users\", \"response_time\": 145}","2024-01-15 08:25:30 - ERROR - Database connection timeout - {\"ip\": \"192.168.1.1\", \"endpoint\": \"/api/data\"}","2024-01-15 08:26:45 - WARNING - High memory usage - {\"usage\": 85}","2024-01-15 08:27:18 - ERROR - Connection refused - {\"ip\": \"192.168.1.3\", \"endpoint\": \"/api/service\"}","2024-01-15 08:28:02 - INFO - File uploaded - {\"ip\": \"192.168.1.4\", \"file_size\": 2048}","2024-01-15 08:29:33 - ERROR - Timeout error - {\"ip\": \"192.168.1.2\", \"endpoint\": \"/api/process\"}", ] analyzer = LogAnalyzer(sample_logs) print("=== 日志统计信息 ===") stats = analyzer.generate_statistics() print(f"总日志数: {stats['total_logs']}") print(f"唯一IP数: {stats['unique_ips']}") print(f"日志级别分布: {stats['level_distribution']}") print(f"热门端点: {stats['top_endpoints']}") print("\n=== 错误摘要 ===") error_summary = analyzer.get_error_summary() print(f"错误总数: {error_summary['total_errors']}") print(f"错误类型: {dict(error_summary['error_types'])}") print("最近错误:")for error in error_summary['recent_errors']: print(f" {error['time']} - {error['message']}") print("\n=== 搜索包含'connection'的日志 ===")for log in analyzer.search_logs('connection'): print(f"[{log['level']}] {log['message']}")if __name__ == "__main__": run_log_analyzer()
示例3:数据清洗与转换ETL工具
案例描述: 构建一个数据处理工具,使用推导式进行复杂的数据清洗、转换和验证。
import reimport hashlibfrom typing import List, Dict, Any, Callable, Unionfrom datetime import datetimeimport jsonclassDataETLPipeline:"""ETL数据处理管道"""def__init__(self, data: List[Dict]): self.original_data = data self.transformed_data = data.copy() self.validation_errors = []defclean_data(self, cleaning_rules: Dict[str, List[Callable]]) -> 'DataETLPipeline':"""数据清洗:应用清洗规则""" self.transformed_data = [ { key: self._apply_cleaning_rules(value, cleaning_rules.get(key, []))for key, value in record.items() }for record in self.transformed_data ]return selfdef_apply_cleaning_rules(self, value: Any, rules: List[Callable]) -> Any:"""应用清洗规则"""for rule in rules:try: value = rule(value)except Exception as e: self.validation_errors.append(f"清洗规则失败: {rule.__name__} - {str(e)}")return valuedeftransform_data(self, transformations: Dict[str, Callable]) -> 'DataETLPipeline':"""数据转换:应用转换函数""" self.transformed_data = [ { **record, **{ new_key: transform_func(record)for new_key, transform_func in transformations.items() } }for record in self.transformed_data ]return selfdefvalidate_data(self, validation_rules: Dict[str, Callable]) -> 'DataETLPipeline':"""数据验证:验证数据完整性""" valid_records = []for record in self.transformed_data: is_valid = all( validator(record.get(field))for field, validator in validation_rules.items() )if is_valid: valid_records.append(record)else: self.validation_errors.append(f"记录验证失败: {record}") self.transformed_data = valid_recordsreturn selfdeffilter_data(self, condition: Callable[[Dict], bool]) -> 'DataETLPipeline':"""数据过滤:根据条件过滤记录""" self.transformed_data = [ record for record in self.transformed_dataif condition(record) ]return selfdefaggregate_data(self, group_by: str, aggregations: Dict[str, Callable]) -> List[Dict]:"""数据聚合:按字段分组聚合"""# 按分组字段分组 groups = {}for record in self.transformed_data: key = record.get(group_by)if key notin groups: groups[key] = [] groups[key].append(record)# 聚合计算return [ { group_by: group_key, **{ agg_name: agg_func(group_records)for agg_name, agg_func in aggregations.items() } }for group_key, group_records in groups.items() ]defextract_metrics(self, metrics: Dict[str, Callable]) -> Dict:"""提取指标数据"""return { metric_name: metric_func(self.transformed_data)for metric_name, metric_func in metrics.items() }defget_summary(self) -> Dict:"""获取处理摘要"""return {'original_count': len(self.original_data),'transformed_count': len(self.transformed_data),'validation_errors': len(self.validation_errors),'error_samples': self.validation_errors[:5],'field_statistics': self._generate_field_stats() }def_generate_field_stats(self) -> Dict:"""生成字段统计信息"""ifnot self.transformed_data:return {}return { field: {'type': type(self.transformed_data[0][field]).__name__,'unique_values': len(set( record[field] for record in self.transformed_data )),'missing': len([ r for r in self.transformed_data if r.get(field) in (None, '', []) ]),'sample_values': [ record[field] for record in self.transformed_data[:3] ] }for field in self.transformed_data[0].keys() }# 定义常用的清洗和转换函数classDataTransformations:"""常用的数据转换函数""" @staticmethoddefstrip_whitespace(value: str) -> str:"""去除空白字符"""return value.strip() if isinstance(value, str) else value @staticmethoddefto_lowercase(value: str) -> str:"""转换为小写"""return value.lower() if isinstance(value, str) else value @staticmethoddefto_uppercase(value: str) -> str:"""转换为大写"""return value.upper() if isinstance(value, str) else value @staticmethoddefto_date(value: str, format: str = '%Y-%m-%d') -> datetime:"""转换为日期"""if isinstance(value, str):try:return datetime.strptime(value, format)except:return valuereturn value @staticmethoddefto_float(value: Union[str, int, float]) -> float:"""转换为浮点数"""try:return float(value)except (ValueError, TypeError):return0.0 @staticmethoddefmask_email(email: str) -> str:"""掩码邮箱地址"""if'@'notin email:return email local, domain = email.split('@') masked_local = local[:3] + '*' * (len(local) - 3) if len(local) > 3else localreturnf"{masked_local}@{domain}" @staticmethoddefhash_value(value: str, algorithm: str = 'md5') -> str:"""哈希值"""ifnot isinstance(value, str): value = str(value)return hashlib.md5(value.encode()).hexdigest()[:8]# 使用示例defrun_etl_pipeline():# 原始数据 raw_data = [ {'name': ' Alice Smith ', 'email': 'ALICE@EXAMPLE.COM', 'age': '25', 'salary': '50000.50', 'join_date': '2023-01-15'}, {'name': 'Bob Johnson', 'email': 'bob@example.com', 'age': '30', 'salary': '60000.75', 'join_date': '2023-02-20'}, {'name': 'Charlie Brown', 'email': '', 'age': '35', 'salary': 'invalid', 'join_date': '2023-03-10'}, {'name': 'Diana Prince', 'email': 'diana@example.com', 'age': '28', 'salary': '55000.25', 'join_date': '2023-04-05'}, {'name': ' ', 'email': 'invalid-email', 'age': 'forty', 'salary': '70000.00', 'join_date': '2023-05-12'}, ]# 创建ETL管道 etl = DataETLPipeline(raw_data)# 定义清洗规则 cleaning_rules = {'name': [ DataTransformations.strip_whitespace,lambda x: x if x and x.strip() else'Unknown' ],'email': [ DataTransformations.strip_whitespace, DataTransformations.to_lowercase,lambda x: x if'@'in x elseNone ],'age': [ DataTransformations.to_float,lambda x: x if0 < x < 120elseNone ], }# 定义转换规则 transformations = {'email_masked': lambda r: DataTransformations.mask_email(r.get('email', '')),'id_hash': lambda r: DataTransformations.hash_value(f"{r.get('name')}{r.get('age')}"),'salary_float': lambda r: DataTransformations.to_float(r.get('salary', 0)),'join_year': lambda r: DataTransformations.to_date(r.get('join_date')).year if DataTransformations.to_date(r.get('join_date')) elseNone,'seniority': lambda r: 'Senior'if DataTransformations.to_float(r.get('age', 0)) > 30else'Junior' }# 定义验证规则 validation_rules = {'name': lambda x: x and x != 'Unknown','email': lambda x: x and'@'in x,'age': lambda x: isinstance(x, (int, float)) and0 < x < 120, }# 执行ETL流程 result = (etl .clean_data(cleaning_rules) .transform_data(transformations) .validate_data(validation_rules) .filter_data(lambda r: r.get('age', 0) >= 25) )# 打印结果 print("=== 清洗后的数据 ===")for record in result.transformed_data: print(json.dumps(record, indent=2, default=str)) print("\n=== 处理摘要 ===") summary = result.get_summary() print(f"原始记录数: {summary['original_count']}") print(f"处理后记录数: {summary['transformed_count']}") print(f"验证错误数: {summary['validation_errors']}")# 聚合分析 print("\n=== 按资历分组 ===") aggregations = result.aggregate_data( group_by='seniority', aggregations={'count': lambda g: len(g),'avg_age': lambda g: sum(r['age'] for r in g) / len(g),'avg_salary': lambda g: sum(r['salary_float'] for r in g) / len(g),'names': lambda g: [r['name'] for r in g] } )for group in aggregations: print(json.dumps(group, indent=2))# 提取指标 metrics = result.extract_metrics({'total_records': lambda d: len(d),'avg_age': lambda d: sum(r['age'] for r in d) / len(d) if d else0,'total_salary': lambda d: sum(r['salary_float'] for r in d),'unique_domains': lambda d: len(set( r['email'].split('@')[1] for r in d if r.get('email') )) }) print("\n=== 指标统计 ===") print(json.dumps(metrics, indent=2))if __name__ == "__main__": run_etl_pipeline()
4. 注意事项
4.1 性能对比示例代码
import timeitimport sysdefperformance_comparison():"""比较不同方法的性能"""# 测试1:列表创建 print("=== 测试1:创建1-1000的平方列表 ===")# 传统循环deftraditional(): result = []for i in range(1000): result.append(i ** 2)return result# 列表推导式defcomprehension():return [i ** 2for i in range(1000)]# map函数defmap_func():return list(map(lambda x: x ** 2, range(1000))) time_traditional = timeit.timeit(traditional, number=10000) time_comprehension = timeit.timeit(comprehension, number=10000) time_map = timeit.timeit(map_func, number=10000) print(f"传统循环: {time_traditional:.4f}秒") print(f"列表推导式: {time_comprehension:.4f}秒") print(f"map函数: {time_map:.4f}秒")# 测试2:条件过滤 print("\n=== 测试2:过滤偶数并计算平方 ===")deftraditional_filter(): result = []for i in range(1000):if i % 2 == 0: result.append(i ** 2)return resultdefcomprehension_filter():return [i ** 2for i in range(1000) if i % 2 == 0]deffilter_map():return list(map(lambda x: x ** 2, filter(lambda x: x % 2 == 0, range(1000)))) time_traditional = timeit.timeit(traditional_filter, number=10000) time_comprehension = timeit.timeit(comprehension_filter, number=10000) time_filter_map = timeit.timeit(filter_map, number=10000) print(f"传统循环: {time_traditional:.4f}秒") print(f"列表推导式: {time_comprehension:.4f}秒") print(f"filter+map: {time_filter_map:.4f}秒")# 测试3:嵌套循环 print("\n=== 测试3:创建5x5乘法表 ===")deftraditional_nested(): result = []for i in range(1, 6): row = []for j in range(1, 6): row.append(i * j) result.append(row)return resultdefcomprehension_nested():return [[i * j for j in range(1, 6)] for i in range(1, 6)] time_traditional = timeit.timeit(traditional_nested, number=100000) time_comprehension = timeit.timeit(comprehension_nested, number=100000) print(f"传统嵌套循环: {time_traditional:.4f}秒") print(f"嵌套推导式: {time_comprehension:.4f}秒")if __name__ == "__main__": performance_comparison()
4.2 常见陷阱
classComprehensionPitfalls:"""推导式常见陷阱示例""" @staticmethoddeftrap1_variable_leakage():"""陷阱1:变量泄露(Python 2 vs Python 3)"""# Python 3中不会泄露 x = 'outer' squares = [x ** 2for x in range(5)] print(f"外部x: {x}") # 仍然是 'outer',不是4# 但在某些情况下需要小心 i = 10 data = [i * 2for i in range(3)] print(f"外部i: {i}") # 仍然是10 @staticmethoddeftrap2_memory_explosion():"""陷阱2:内存爆炸"""# 错误做法:一次性创建大列表defdangerous():return [x ** 2for x in range(10**7)] # 可能耗尽内存# 正确做法:使用生成器表达式defsafe():return (x ** 2for x in range(10**7)) @staticmethoddeftrap3_readability_issues():"""陷阱3:过度使用导致可读性下降"""# 难以理解的推导式 matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]# 糟糕的可读性 result = [x for row in matrix for x in row if x % 2 == 0and x > 4]# 更好的可读性 flattened = [x for row in matrix for x in row] filtered = [x for x in flattened if x % 2 == 0and x > 4] print(f"糟糕的方式: {result}") print(f"更好的方式: {filtered}") @staticmethoddeftrap4_condition_placement():"""陷阱4:条件放置错误""" data = [1, -2, 3, -4, 5, -6]# 错误:条件应用不当 wrong = [x if x > 0else0for x in data if x % 2 == 0] print(f"错误结果: {wrong}") # 先过滤偶数,再处理# 正确:分步思考 correct = [(x if x > 0else0) for x in data if x % 2 == 0] print(f"正确结果: {correct}") @staticmethoddeftrap5_side_effects():"""陷阱5:在推导式中使用副作用"""# 错误:在推导式中执行副作用操作defbad_example(): processed = [] [processed.append(x ** 2) for x in range(5)] # 错误用法return processed# 正确:推导式只用于转换defgood_example():return [x ** 2for x in range(5)]# 如果需要副作用,使用显式循环defside_effect_needed(): processed = []for x in range(5): value = x ** 2 processed.append(value) print(f"处理: {value}") # 副作用return processeddefbest_practices():"""推导式最佳实践"""# 1. 保持简单(KISS原则)# 好的做法 squares = [x ** 2for x in range(10)]# 不好的做法(过于复杂) complex_data = [ [x * y for y in range(5) if y % 2 == 0]for x in range(5)if x > 2for z in [1, 2]if z < 3 ]# 2. 适当拆分defsplit_comprehension(data):# 不拆分 result1 = [x ** 2for row in data for x in row if x % 2 == 0and x ** 2 > 10]# 拆分版本 flattened = [x for row in data for x in row] evens = [x for x in flattened if x % 2 == 0] result2 = [x ** 2for x in evens if x ** 2 > 10]return result2# 3. 命名有意义# 不好的命名 d = [x for x in r if x > 0]# 好的命名 positive_numbers = [num for num in raw_data if num > 0]# 4. 考虑使用内置函数 numbers = [1, 2, 3, 4, 5]# 手动计算总和 total1 = sum([x for x in numbers]) # 不必要的列表创建# 更好的方式 total2 = sum(x for x in numbers) # 生成器表达式 total3 = sum(numbers) # 直接使用序列# 5. 文档化复杂的推导式defcomplex_transformation(data):"""转换数据结构: 从原始数据创建标准化的用户字典 """return [ {'id': idx,'name': item.get('name', '').strip().title(),'email': item.get('email', '').lower(),'age': int(item.get('age', 0)) if item.get('age', '').isdigit() else0,'active': item.get('status') == 'active' }for idx, item in enumerate(data)if item.get('name') # 跳过没有名字的记录 ]if __name__ == "__main__": pitfalls = ComprehensionPitfalls() pitfalls.trap1_variable_leakage() pitfalls.trap3_readability_issues() pitfalls.trap4_condition_placement() best_practices()
5. 练习推荐
5.1 基础练习
# 练习1:字符串处理defstring_exercises():"""字符串处理练习""" words = ["Python", "is", "awesome", "and", "powerful"]# 1.1 将列表中的单词转换为大写 uppercase_words = [word.upper() for word in words] print(f"大写单词: {uppercase_words}")# 1.2 只保留长度大于3的单词 long_words = [word for word in words if len(word) > 3] print(f"长单词: {long_words}")# 1.3 创建单词长度字典 word_lengths = {word: len(word) for word in words} print(f"单词长度: {word_lengths}")# 1.4 提取所有元音字母 vowels = {char for word in words for char in word if char.lower() in'aeiou'} print(f"元音字母: {vowels}")# 练习2:数值处理defnumeric_exercises():"""数值处理练习""" numbers = [1, 5, -3, 7, -2, 8, -1, 4, 0, 6]# 2.1 获取所有正数的平方 positive_squares = [x ** 2for x in numbers if x > 0] print(f"正数平方: {positive_squares}")# 2.2 计算正负数的个数 positive_count = len([x for x in numbers if x > 0]) negative_count = len([x for x in numbers if x < 0]) print(f"正数: {positive_count}, 负数: {negative_count}")# 2.3 创建数字分类字典 classified = {'positive': [x for x in numbers if x > 0],'negative': [x for x in numbers if x < 0],'zero': [x for x in numbers if x == 0] } print(f"分类结果: {classified}")# 练习3:文件处理deffile_exercises():"""文件处理练习"""import os# 模拟文件列表 files = ["document.txt", "image.jpg", "data.csv", "script.py", "notes.txt", "photo.png" ]# 3.1 按扩展名分组 extensions = {file.split('.')[-1] for file in files if'.'in file} grouped = { ext: [file for file in files if file.endswith(ext)]for ext in extensions } print(f"文件分组: {grouped}")# 3.2 获取所有文本文件 text_files = [file for file in files if file.endswith(('.txt', '.csv'))] print(f"文本文件: {text_files}")
5.2 中级练习
练习4:股票数据分析器
import randomfrom datetime import datetime, timedeltaclassStockAnalyzer:"""股票数据分析器"""def__init__(self, stock_data):""" stock_data: [ {'symbol': 'AAPL', 'date': '2024-01-15', 'open': 150.0, 'high': 155.0, 'low': 149.0, 'close': 154.0, 'volume': 1000000}, ... ] """ self.data = stock_datadefcalculate_daily_returns(self):"""计算每日收益率"""return [ {'symbol': item['symbol'],'date': item['date'],'return': (item['close'] - item['open']) / item['open'] * 100,'volatility': (item['high'] - item['low']) / item['open'] * 100 }for item in self.data ]deffind_best_performers(self, top_n=5):"""找出表现最好的股票""" daily_returns = self.calculate_daily_returns()return sorted(daily_returns, key=lambda x: x['return'], reverse=True)[:top_n]defcalculate_moving_average(self, symbol, window=5):"""计算移动平均线""" symbol_data = [item for item in self.data if item['symbol'] == symbol] symbol_data.sort(key=lambda x: x['date'])return [ {'date': item['date'],'close': item['close'],'ma': sum(symbol_data[max(0, i-window+1):i+1]['close'] for i in range(max(0, idx-window+1), idx+1)) / window }for idx, item in enumerate(symbol_data) ]defget_statistics(self):"""获取统计信息""" symbols = {item['symbol'] for item in self.data}return { symbol: {'avg_close': sum( item['close'] for item in self.data if item['symbol'] == symbol ) / len([i for i in self.data if i['symbol'] == symbol]),'max_high': max( item['high'] for item in self.data if item['symbol'] == symbol ),'min_low': min( item['low'] for item in self.data if item['symbol'] == symbol ),'total_volume': sum( item['volume'] for item in self.data if item['symbol'] == symbol ) }for symbol in symbols }# 生成测试数据defgenerate_stock_data(): symbols = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA'] data = []for symbol in symbols: price = random.uniform(100, 1000)for days_ago in range(30): date = datetime.now() - timedelta(days=days_ago) change = random.uniform(-0.05, 0.05) open_price = price * (1 + change) high = open_price * random.uniform(1.01, 1.05) low = open_price * random.uniform(0.95, 0.99) close = random.uniform(low, high) volume = random.randint(500000, 5000000) data.append({'symbol': symbol,'date': date.strftime('%Y-%m-%d'),'open': round(open_price, 2),'high': round(high, 2),'low': round(low, 2),'close': round(close, 2),'volume': volume }) price = closereturn data# 运行挑战defrun_stock_challenge(): print("=== 股票数据分析挑战 ===") data = generate_stock_data() analyzer = StockAnalyzer(data) print("\n最佳表现股票:")for stock in analyzer.find_best_performers(3): print(f"{stock['symbol']}: {stock['return']:.2f}%") print("\n统计信息:") stats = analyzer.get_statistics()for symbol, stat in stats.items(): print(f"{symbol}: 平均收盘价 ${stat['avg_close']:.2f}, "f"总成交量 {stat['total_volume']:,}")if __name__ == "__main__": run_stock_challenge()
练习5:社交网络分析器
from collections import Counterimport itertoolsclassSocialNetworkAnalyzer:"""社交网络分析器"""def__init__(self, users, posts, interactions):""" users: [{'id': 1, 'name': 'Alice', 'age': 25, 'city': 'New York'}, ...] posts: [{'id': 101, 'user_id': 1, 'content': '...', 'timestamp': '...'}, ...] interactions: [{'user_id': 1, 'post_id': 101, 'type': 'like', 'timestamp': '...'}, ...] """ self.users = users self.posts = posts self.interactions = interactionsdefget_active_users(self, min_posts=5, min_interactions=10):"""找出活跃用户"""# 计算用户发帖数 post_counts = Counter( post['user_id'] for post in self.posts )# 计算用户互动数 interaction_counts = Counter( interaction['user_id'] for interaction in self.interactions )return [ { **user,'posts_count': post_counts.get(user['id'], 0),'interactions_count': interaction_counts.get(user['id'], 0),'activity_score': ( post_counts.get(user['id'], 0) * 2 + interaction_counts.get(user['id'], 0) ) }for user in self.usersif (post_counts.get(user['id'], 0) >= min_posts or interaction_counts.get(user['id'], 0) >= min_interactions) ]deffind_influencers(self, top_n=5):"""找出有影响力的用户"""# 计算每个用户获得的互动数 received_interactions = Counter( interaction['post_id'] for interaction in self.interactions ) post_author = { post['id']: post['user_id'] for post in self.posts } influencer_scores = Counter()for post_id, interaction_count in received_interactions.items(): author_id = post_author.get(post_id)if author_id: influencer_scores[author_id] += interaction_countreturn [ {'user': next(user for user in self.users if user['id'] == user_id),'total_interactions': score,'avg_interactions_per_post': score / len([ p for p in self.posts if p['user_id'] == user_id ]) if len([p for p in self.posts if p['user_id'] == user_id]) > 0else0 }for user_id, score in influencer_scores.most_common(top_n) ]defanalyze_engagement_by_demographics(self):"""按人口统计特征分析参与度"""# 按城市分组 city_groups = {}for user in self.users: city = user.get('city', 'Unknown')if city notin city_groups: city_groups[city] = [] city_groups[city].append(user['id'])return { city: {'user_count': len(user_ids),'total_posts': len([ p for p in self.posts if p['user_id'] in user_ids ]),'total_interactions': len([ i for i in self.interactions if i['user_id'] in user_ids ]),'avg_posts_per_user': len([ p for p in self.posts if p['user_id'] in user_ids ]) / len(user_ids) if user_ids else0,'active_users': len([ uid for uid in user_idsif len([p for p in self.posts if p['user_id'] == uid]) > 0 ]) }for city, user_ids in city_groups.items() }defdetect_communities(self):"""检测社区(基于互动关系)"""# 构建互动图 interactions_by_user = {}for interaction in self.interactions: user_id = interaction['user_id']if user_id notin interactions_by_user: interactions_by_user[user_id] = set()# 找到帖子作者 post = next((p for p in self.posts if p['id'] == interaction['post_id']), None)if post and post['user_id'] != user_id: interactions_by_user[user_id].add(post['user_id'])# 简单的社区检测(基于共同互动) communities = [] processed = set()for user_id, interacted_with in interactions_by_user.items():if user_id in processed:continue# 找到与当前用户有共同互动的用户 community = {user_id}for other_id, other_interactions in interactions_by_user.items():if other_id != user_id and other_id notin processed:if len(interacted_with & other_interactions) > 0: community.add(other_id) processed.update(community)if len(community) > 1: communities.append({'size': len(community),'members': [ next(u for u in self.users if u['id'] == uid)['name']for uid in community ],'common_interests': list(set().union( *[interactions_by_user.get(uid, set()) for uid in community] )) })return communitiesdefgenerate_content_recommendations(self, user_id, n=5):"""生成内容推荐"""# 获取用户互动过的帖子类型 user_interactions = [ i for i in self.interactions if i['user_id'] == user_id ] user_interacted_posts = [ next(p for p in self.posts if p['id'] == i['post_id'])for i in user_interactions ]# 提取关键词(简化版:使用单词长度作为特征)defextract_keywords(content):return [word for word in content.split() if len(word) > 3] user_keywords = Counter()for post in user_interacted_posts: user_keywords.update(extract_keywords(post['content']))# 找到未互动过的帖子 interacted_post_ids = {i['post_id'] for i in user_interactions} candidate_posts = [ p for p in self.posts if p['id'] notin interacted_post_ids ]# 计算推荐分数 scored_posts = [ {'post': post,'score': sum( user_keywords.get(keyword, 0) for keyword in extract_keywords(post['content']) ) }for post in candidate_posts ]return sorted( scored_posts, key=lambda x: x['score'], reverse=True )[:n]# 运行挑战defrun_social_network_challenge(): print("=== 社交网络分析挑战 ===")# 生成测试数据 users = [ {'id': 1, 'name': 'Alice', 'age': 25, 'city': 'New York'}, {'id': 2, 'name': 'Bob', 'age': 30, 'city': 'London'}, {'id': 3, 'name': 'Charlie', 'age': 28, 'city': 'New York'}, {'id': 4, 'name': 'Diana', 'age': 32, 'city': 'Tokyo'}, {'id': 5, 'name': 'Eve', 'age': 26, 'city': 'London'}, ] posts = [ {'id': 101, 'user_id': 1, 'content': 'Python programming is amazing!', 'timestamp': '2024-01-15 10:00'}, {'id': 102, 'user_id': 2, 'content': 'Just finished a great book on AI', 'timestamp': '2024-01-15 11:00'}, {'id': 103, 'user_id': 1, 'content': 'Data science is the future', 'timestamp': '2024-01-15 12:00'}, {'id': 104, 'user_id': 3, 'content': 'Learning machine learning', 'timestamp': '2024-01-15 13:00'}, {'id': 105, 'user_id': 4, 'content': 'Beautiful sunset in Tokyo', 'timestamp': '2024-01-15 14:00'}, ] interactions = [ {'user_id': 2, 'post_id': 101, 'type': 'like', 'timestamp': '2024-01-15 10:30'}, {'user_id': 3, 'post_id': 101, 'type': 'comment', 'timestamp': '2024-01-15 10:45'}, {'user_id': 4, 'post_id': 102, 'type': 'like', 'timestamp': '2024-01-15 11:30'}, {'user_id': 5, 'post_id': 101, 'type': 'share', 'timestamp': '2024-01-15 12:15'}, {'user_id': 1, 'post_id': 104, 'type': 'like', 'timestamp': '2024-01-15 13:30'}, {'user_id': 2, 'post_id': 103, 'type': 'like', 'timestamp': '2024-01-15 14:00'}, {'user_id': 3, 'post_id': 105, 'type': 'comment', 'timestamp': '2024-01-15 14:30'}, ] analyzer = SocialNetworkAnalyzer(users, posts, interactions) print("\n活跃用户:")for user in analyzer.get_active_users(min_posts=1): print(f"{user['name']}: {user['posts_count']} posts, "f"{user['interactions_count']} interactions") print("\n有影响力的用户:")for influencer in analyzer.find_influencers(3): print(f"{influencer['user']['name']}: "f"{influencer['total_interactions']} total interactions") print("\n按城市分析:") city_analysis = analyzer.analyze_engagement_by_demographics()for city, stats in city_analysis.items(): print(f"{city}: {stats['user_count']} users, "f"{stats['total_posts']} posts, "f"{stats['avg_posts_per_user']:.1f} avg posts/user") print("\n检测到的社区:")for community in analyzer.detect_communities(): print(f"社区大小: {community['size']}, "f"成员: {', '.join(community['members'])}")if __name__ == "__main__": run_social_network_challenge()
6. 高级技巧
classAdvancedComprehensions:"""高级推导式技巧""" @staticmethoddefconditional_nested_comprehensions():"""条件嵌套推导式""" matrix = [ [1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12] ]# 获取所有大于5且是偶数的元素 result = [ num for row in matrix for num in row if num > 5and num % 2 == 0 ] print(f"条件嵌套结果: {result}") @staticmethoddefmultiple_if_conditions():"""多个if条件""" numbers = range(1, 31)# 复杂的条件组合 special_numbers = [ x for x in numbersif x % 2 == 0# 偶数if x % 3 == 0# 能被3整除if x > 10# 大于10if str(x)[-1] != '0'# 不以0结尾 ] print(f"特殊数字: {special_numbers}") @staticmethoddefusing_walrus_operator():"""使用海象运算符(Python 3.8+)""" data = [1, 4, 2, 8, 5, 7, 3, 6]# 同时计算和处理 processed = [ squared for x in data if (squared := x ** 2) > 10 ] print(f"使用海象运算符: {processed}") @staticmethoddefasync_comprehensions():"""异步推导式(Python 3.6+)"""import asyncioasyncdeffetch_data(id):await asyncio.sleep(0.1)returnf"data-{id}"asyncdefmain():# 异步列表推导式 results = [await fetch_data(i) for i in range(5) ] print(f"异步结果: {results}")# 异步集合推导式 unique_results = {await fetch_data(i % 3) for i in range(5) } print(f"异步唯一结果: {unique_results}")# asyncio.run(main()) @staticmethoddeftype_hints_with_comprehensions():"""带类型提示的推导式"""from typing import List, Dict, Set, Generator# 带类型提示的列表推导式 numbers: List[int] = [x for x in range(10)]# 带类型提示的字典推导式 squares: Dict[int, int] = {x: x**2for x in numbers}# 带类型提示的集合推导式 unique_values: Set[int] = {x % 3for x in numbers}# 带类型提示的生成器表达式 even_numbers: Generator[int, None, None] = (x for x in numbers if x % 2 == 0) print(f"类型提示示例: {squares}")# 面试常见问题classInterviewQuestions:"""面试常见问题""" @staticmethoddefquestion1_flatten_nested_list():"""问题1:展平嵌套列表""" nested = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]# 解法1:列表推导式 flattened1 = [num for sublist in nested for num in sublist]# 解法2:递归展平(处理多层嵌套)defflatten_deep(nested_list):return [ item for sublist in nested_list for item in (flatten_deep(sublist) if isinstance(sublist, list) else [sublist]) ] deep_nested = [1, [2, 3], [4, [5, 6]]] flattened2 = flatten_deep(deep_nested) print(f"展平结果1: {flattened1}") print(f"展平结果2: {flattened2}") @staticmethoddefquestion2_find_common_elements():"""问题2:找出两个列表的共同元素""" list1 = [1, 2, 3, 4, 5] list2 = [4, 5, 6, 7, 8]# 解法1:使用集合 common = list(set(list1) & set(list2))# 解法2:使用推导式 common2 = [x for x in list1 if x in list2] print(f"共同元素: {common}") @staticmethoddefquestion3_remove_duplicates_preserve_order():"""问题3:去重并保持顺序""" data = [3, 1, 2, 1, 3, 4, 2, 5]# 解法:使用集合记录已见元素 seen = set() unique = [x for x in data ifnot (x in seen or seen.add(x))] print(f"去重后(保持顺序): {unique}") @staticmethoddefquestion4_group_by_key():"""问题4:按键分组""" items = [ {'category': 'fruit', 'name': 'apple'}, {'category': 'fruit', 'name': 'banana'}, {'category': 'vegetable', 'name': 'carrot'}, {'category': 'fruit', 'name': 'orange'}, {'category': 'vegetable', 'name': 'broccoli'}, ]# 使用字典推导式分组 grouped = {}for item in items: category = item['category']if category notin grouped: grouped[category] = [] grouped[category].append(item['name'])# 更简洁的写法 grouped2 = { category: [item['name'] for item in items if item['category'] == category]for category in {item['category'] for item in items} } print(f"分组结果: {grouped2}") @staticmethoddefquestion5_matrix_transpose():"""问题5:矩阵转置""" matrix = [ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]# 解法:使用zip和列表推导式 transpose = [list(row) for row in zip(*matrix)]# 手动实现 transpose2 = [ [matrix[j][i] for j in range(len(matrix))]for i in range(len(matrix[0])) ] print(f"转置结果: {transpose}") @staticmethoddefquestion6_palindrome_checker():"""问题6:回文检查器""" words = ['radar', 'hello', 'level', 'world', 'madam']# 找出所有回文 palindromes = [word for word in words if word == word[::-1]] print(f"回文单词: {palindromes}") @staticmethoddefquestion7_prime_number_generator():"""问题7:素数生成器""" n = 50# 使用埃拉托斯特尼筛法 primes = [ x for x in range(2, n)if all(x % y != 0for y in range(2, int(x**0.5) + 1)) ] print(f"1-{n}的素数: {primes}") @staticmethoddefquestion8_anagram_finder():"""问题8:变位词查找器""" words = ['listen', 'silent', 'hello', 'world', 'night', 'thing']defis_anagram(word1, word2):return sorted(word1) == sorted(word2)# 找出所有变位词组 anagram_groups = {}for word in words: sorted_word = ''.join(sorted(word))if sorted_word notin anagram_groups: anagram_groups[sorted_word] = [] anagram_groups[sorted_word].append(word)# 过滤出有变位词的组 result = { key: value for key, value in anagram_groups.items() if len(value) > 1 } print(f"变位词组: {result}")# 运行面试问题示例defrun_interview_prep(): print("=== 面试准备示例 ===") interview = InterviewQuestions() interview.question1_flatten_nested_list() interview.question2_find_common_elements() interview.question3_remove_duplicates_preserve_order() interview.question4_group_by_key() interview.question5_matrix_transpose() interview.question6_palindrome_checker() interview.question7_prime_number_generator() interview.question8_anagram_finder()if __name__ == "__main__":# 运行所有示例 string_exercises() numeric_exercises() file_exercises() run_stock_challenge() run_social_network_challenge() run_interview_prep()# 高级技巧 advanced = AdvancedComprehensions() advanced.conditional_nested_comprehensions() advanced.multiple_if_conditions() advanced.using_walrus_operator()
总结
本文深入探讨了 Python 推导式的各个方面:
使用原则:
推导式是 Python 的利器,但不是万能的。在追求代码简洁的同时,不要牺牲可读性和可维护性。