filter():数据过滤的"智能筛子"
1. 基础用法:基于条件过滤元素
filter()函数使用指定函数来过滤可迭代对象中的元素,只保留函数返回真值的元素。
# 基础过滤示例
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# 过滤偶数
even_numbers = list(filter(lambda x: x % 2 == 0, numbers))
print(f"偶数: {even_numbers}") # 输出: [2, 4, 6, 8, 10]
# 过滤None值
mixed_data = [0, 1, False, True, None, "hello", "", 3.14]
truthy_values = list(filter(None, mixed_data))
print(f"真值元素: {truthy_values}") # 输出: [1, True, 'hello', 3.14]
# 等价生成器表达式
# filter(function, iterable) 相当于:
# (item for item in iterable if function(item)) # 当function不是None时
# (item for item in iterable if item) # 当function是None时
2. 实际应用:数据清洗和验证
class DataCleaner:
@staticmethod
def remove_outliers(data, threshold_func):
"""移除异常值"""
return list(filter(threshold_func, data))
@staticmethod
def validate_emails(email_list):
"""验证邮箱格式(简单版本)"""
def is_valid_email(email):
return isinstance(email, str) and '@' in email and '.' in email.split('@')[-1]
return list(filter(is_valid_email, email_list))
@staticmethod
def filter_by_type(data, target_type):
"""按类型过滤数据"""
return list(filter(lambda x: isinstance(x, target_type), data))
# 使用示例
cleaner = DataCleaner()
# 移除数值异常值
numbers = [10, 15, 100, 12, 8, 200, 14]
normal_numbers = cleaner.remove_outliers(numbers, lambda x: x < 50)
print(f"正常数值: {normal_numbers}")
# 邮箱验证
emails = ["user@example.com", "invalid", "test@domain", "admin@site.org"]
valid_emails = cleaner.validate_emails(emails)
print(f"有效邮箱: {valid_emails}")
# 类型过滤
mixed_data = [1, "hello", 3.14, [1, 2], "world", 42]
strings_only = cleaner.filter_by_type(mixed_data, str)
print(f"仅字符串: {strings_only}")
float():数值转换的"精确天平"
1. 基础用法:创建浮点数
float()函数从数字或字符串创建浮点数,支持多种输入格式。
# 从数字创建
print(f"从整数创建: {float(42)}") # 输出: 42.0
print(f"从浮点数创建: {float(3.14)}") # 输出: 3.14
# 从字符串创建
print(f"带符号字符串: {float('+1.23')}") # 输出: 1.23
print(f"带空格字符串: {float(' -12345\\n')}") # 输出: -12345.0
print(f"科学计数法: {float('1e-003')}") # 输出: 0.001
print(f"大写科学计数法: {float('+1E6')}") # 输出: 1000000.0
print(f"无穷大: {float('-Infinity')}") # 输出: -inf
# 特殊值
print(f"NaN: {float('nan')}") # 输出: nan
print(f"无穷大: {float('inf')}") # 输出: inf
# 无参数调用
print(f"无参数: {float()}") # 输出: 0.0
2. 实际应用:安全数值转换和数据处理
class SafeFloatConverter:
@staticmethod
def safe_float_conversion(value, default=0.0):
"""安全转换为浮点数"""
try:
return float(value)
except (ValueError, TypeError):
return default
@staticmethod
def parse_numeric_strings(string_list):
"""从字符串列表中解析数值"""
def try_convert(s):
try:
return float(s)
except (ValueError, TypeError):
return None
converted = filter(lambda x: x is not None, map(try_convert, string_list))
return list(converted)
@staticmethod
def validate_float_range(value, min_val=None, max_val=None):
"""验证浮点数范围"""
try:
num = float(value)
if min_val is not None and num < min_val:
return False
if max_val is not None and num > max_val:
return False
return True
except (ValueError, TypeError):
return False
# 使用示例
converter = SafeFloatConverter()
# 安全转换
test_values = ["3.14", "invalid", "99.9", None, "1e2"]
safe_results = [converter.safe_float_conversion(v) for v in test_values]
print(f"安全转换结果: {safe_results}")
# 数值字符串解析
numeric_strings = ["123", "45.67", "1e-3", "not_a_number", "-999"]
parsed_numbers = converter.parse_numeric_strings(numeric_strings)
print(f"解析后的数值: {parsed_numbers}")
# 范围验证
values_to_check = [10, "25.5", "1000", "-5", "invalid"]
for val in values_to_check:
is_valid = converter.validate_float_range(val, min_val=0, max_val=100)
print(f"{val} 在0-100范围内: {is_valid}")
format():字符串格式化的"魔术师"
1. 基础用法:值格式化
format()函数将值转换为格式化字符串,支持丰富的格式化选项。
# 基本格式化
print(f"整数格式化: {format(12345, ',')}") # 输出: 12,345
print(f"浮点数格式化: {format(3.14159, '.2f')}") # 输出: 3.14
print(f"百分比格式化: {format(0.256, '.1%')}") # 输出: 25.6%
# 对齐和填充
print(f"右对齐: {format('hello', '>10')}") # 输出: ' hello'
print(f"左对齐: {format('world', '<10')}") # 输出: 'world '
print(f"居中对齐: {format('test', '^10')}") # 输出: ' test '
print(f"零填充: {format(42, '05d')}") # 输出: 00042
# 数字格式化
print(f"十六进制: {format(255, 'x')}") # 输出: ff
print(f"八进制: {format(64, 'o')}") # 输出: 100
print(f"二进制: {format(10, 'b')}") # 输出: 1010
# 与str()的比较
value = 3.1415926
print(f"str()版本: {str(value)}") # 输出: 3.1415926
print(f"format()版本: {format(value, '.3f')}") # 输出: 3.142
2. 实际应用:自定义格式化和报表生成
class ReportFormatter:
@staticmethod
def format_currency(amount, currency_symbol='¥'):
"""格式化货币金额"""
return format(amount, f'{currency_symbol},.2f')
@staticmethod
def format_percentage_data(values):
"""格式化百分比数据"""
return [format(val, '.2%') for val in values]
@staticmethod
def create_aligned_table(data, column_widths):
"""创建对齐的表格数据"""
formatted_rows = []
for row in data:
formatted_cells = []
for i, cell in enumerate(row):
width = column_widths[i]
# 根据内容类型选择格式化方式
if isinstance(cell, (int, float)):
formatted = format(cell, f'>{width}')
else:
formatted = format(str(cell), f'<{width}')
formatted_cells.append(formatted)
formatted_rows.append(' '.join(formatted_cells))
return formatted_rows
@staticmethod
def format_scientific_data(numbers, precision=3):
"""格式化科学数据"""
return [format(num, f'.{precision}e') for num in numbers]
# 使用示例
formatter = ReportFormatter()
# 货币格式化
amounts = [1234.56, 7890.12, 45.67, 1000000]
currency_formatted = [formatter.format_currency(amt) for amt in amounts]
print(f"货币格式化: {currency_formatted}")
# 百分比数据
percentages = [0.1234, 0.5678, 0.9876]
percent_formatted = formatter.format_percentage_data(percentages)
print(f"百分比格式化: {percent_formatted}")
# 表格对齐
table_data = [
["产品", "销量", "增长率"],
["手机", 1500, 0.25],
["电脑", 800, 0.15],
["平板", 1200, 0.30]
]
aligned_table = formatter.create_aligned_table(table_data, [10, 8, 10])
print("对齐表格:")
for row in aligned_table:
print(row)
# 科学数据
scientific_data = [1234567, 0.000123, 987654321]
sci_formatted = formatter.format_scientific_data(scientific_data)
print(f"科学计数法: {sci_formatted}")
frozenset():不可变集合的"保险箱"
1. 基础用法:创建不可变集合
frozenset()函数创建不可变的集合对象,适合作为字典键或集合元素。
# 从可迭代对象创建
fset1 = frozenset([1, 2, 3, 4, 5])
print(f"从列表创建: {fset1}") # 输出: frozenset({1, 2, 3, 4, 5})
# 从字符串创建
fset2 = frozenset("hello")
print(f"从字符串创建: {fset2}") # 输出: frozenset({'h', 'e', 'l', 'o'})
# 从范围创建
fset3 = frozenset(range(5))
print(f"从范围创建: {fset3}") # 输出: frozenset({0, 1, 2, 3, 4})
# 空集合
empty_fset = frozenset()
print(f"空集合: {empty_fset}") # 输出: frozenset()
# 作为字典键
dict_with_frozenset = {
frozenset([1, 2, 3]): "集合1",
frozenset([4, 5, 6]): "集合2"
}
print(f"字典键示例: {dict_with_frozenset}")
2. 实际应用:数据去重和集合运算
class SetOperations:
@staticmethod
def create_immutable_lookup_table(data_list):
"""创建不可变查找表"""
unique_items = frozenset(data_list)
return {item: index for index, item in enumerate(unique_items)}
@staticmethod
def find_common_elements(*sequences):
"""查找多个序列的共同元素"""
if not sequences:
return frozenset()
# 将所有序列转换为frozenset并求交集
sets = [frozenset(seq) for seq in sequences]
common = sets[0]
for s in sets[1:]:
common = common.intersection(s)
return common
@staticmethod
def create_immutable_config(config_dict):
"""创建不可变配置"""
frozen_config = {}
for key, value in config_dict.items():
if isinstance(value, (list, set)):
frozen_config[key] = frozenset(value)
else:
frozen_config[key] = value
return frozen_config
@staticmethod
def set_operations_example():
"""集合运算示例"""
A = frozenset([1, 2, 3, 4, 5])
B = frozenset([4, 5, 6, 7, 8])
print(f"集合A: {A}")
print(f"集合B: {B}")
print(f"并集: {A.union(B)}")
print(f"交集: {A.intersection(B)}")
print(f"差集(A-B): {A.difference(B)}")
print(f"对称差集: {A.symmetric_difference(B)}")
# 使用示例
operations = SetOperations()
# 创建查找表
data = ["apple", "banana", "apple", "cherry", "banana"]
lookup_table = operations.create_immutable_lookup_table(data)
print(f"查找表: {lookup_table}")
# 查找共同元素
list1 = [1, 2, 3, 4, 5]
list2 = [3, 4, 5, 6, 7]
list3 = [5, 6, 7, 8, 9]
common = operations.find_common_elements(list1, list2, list3)
print(f"共同元素: {common}")
# 不可变配置
config = {
"allowed_users": ["user1", "user2", "user3"],
"permissions": {"read", "write"},
"max_connections": 100
}
frozen_config = operations.create_immutable_config(config)
print(f"不可变配置: {frozen_config}")
# 集合运算演示
operations.set_operations_example()
版本变更与兼容性说明
1. 各函数的版本演进
filter()函数
- • 在Python 3中返回迭代器(Python 2中返回列表)
float()函数的版本变更
# 3.6版本:支持数字分组下划线
print(f"分组数字: {float('1_000_000.5')}") # 输出: 1000000.5
# 3.7版本:参数变为仅限位置形参
# float(number=0.0) -> 现在必须使用位置参数
# 3.8版本:__float__()未定义时回退至__index__()
class CustomNumber:
def __index__(self):
return 42
custom_num = CustomNumber()
print(f"回退转换: {float(custom_num)}") # 输出: 42.0
format()函数的版本变更
# 3.4版本:当format_spec不是空字符串时,object().__format__(format_spec)会触发TypeError
try:
result = object().__format__('s')
print(f"对象格式化: {result}")
except TypeError as e:
print(f"格式化错误: {e}")
frozenset()函数
总结
通过本文的详细解析,我们深入了解了四个重要的Python内置函数:
- 4. frozenset() - 不可变集合的保险箱
关键知识点总结:
- •
filter(func, iterable)惰性过滤元素,支持None函数 - •
float(x)从数字或字符串创建浮点数,支持科学计数法 - •
format(value, spec)灵活格式化值,支持对齐、精度等选项 - •
frozenset(iterable)创建不可变集合,适合作为字典键
版本兼容性提醒:
- • 注意float()在3.7+版本中变为仅限位置参数
- • format()在3.4+版本中对空对象有更严格的错误处理