字符串是 Python 中最常用的数据类型。我们可以使用引号( ' 或 " )来创建字符串。一、字符串的创建
# 单引号s1 = 'Hello'# 双引号s2 = "World"# 三引号(多行字符串)s3 = '''这是多行字符串'''s4 = """也可以使用双引号"""# 原始字符串(不转义)s5 = r'C:\Users\name' # 不会把\n当成换行# 字节字符串s6 = b'Hello' # bytes类型# Unicode字符串(Python 3默认)s7 = 'Hello 世界 🌍'# 空字符串empty = ''empty2 = str()# 字符串拼接s = 'Hello' + ' ' + 'World'print(s) # 'Hello World'# 字符串重复s = 'Ha' * 3print(s) # 'HaHaHa'
二、字符串索引和切片
s = 'Python'# 正向索引print(s[0]) # 'P'print(s[1]) # 'y'print(s[5]) # 'n'# 反向索引print(s[-1]) # 'n' (最后一个)print(s[-2]) # 'o'print(s[-6]) # 'P'# 切片 [start:stop:step]print(s[0:3]) # 'Pyt'print(s[:3]) # 'Pyt' (从开头)print(s[3:]) # 'hon' (到结尾)print(s[:]) # 'Python' (完整复制)# 带步长print(s[::2]) # 'Pto' (每隔一个)print(s[1::2]) # 'yhn'# 反转字符串print(s[::-1]) # 'nohtyP'# 负数切片print(s[-3:]) # 'hon' (最后3个)print(s[:-3]) # 'Pyt' (除了最后3个)# 切片越界不报错print(s[10:]) # '' (空字符串)print(s[:100]) # 'Python'
三、字符串方法(50+个)
1. 大小写转换
s = 'Hello World'# 全部大写print(s.upper()) # 'HELLO WORLD'# 全部小写print(s.lower()) # 'hello world'# 首字母大写print(s.capitalize()) # 'Hello world'# 每个单词首字母大写print(s.title()) # 'Hello World'# 大小写互换print(s.swapcase()) # 'hELLO wORLD'# 判断print(s.isupper()) # Falseprint(s.islower()) # Falseprint(s.istitle()) # True
2. 查找和替换
s = 'Hello World, Hello Python'# 查找子串(返回索引)print(s.find('World')) # 6print(s.find('Java')) # -1 (未找到)# 从右边查找print(s.rfind('Hello')) # 13# 查找(未找到抛异常)print(s.index('World')) # 6# print(s.index('Java')) # ValueError# 统计出现次数print(s.count('Hello')) # 2print(s.count('o')) # 4# 替换print(s.replace('Hello', 'Hi')) # 'Hi World, Hi Python'# 替换指定次数print(s.replace('Hello', 'Hi', 1)) # 'Hi World, Hello Python'# 判断开头print(s.startswith('Hello')) # Trueprint(s.startswith('Hi')) # False# 判断结尾print(s.endswith('Python')) # Trueprint(s.endswith('Java')) # False
3. 分割和连接
# split() - 分割字符串s = 'apple,banana,orange'print(s.split(',')) # ['apple', 'banana', 'orange']s = 'one two three four'print(s.split()) # 默认按空白分割# ['one', 'two', 'three', 'four']# 限制分割次数s = 'a-b-c-d'print(s.split('-', 2)) # ['a', 'b', 'c-d']# rsplit() - 从右边分割print(s.rsplit('-', 1)) # ['a-b-c', 'd']# splitlines() - 按行分割s = 'line1\nline2\nline3'print(s.splitlines()) # ['line1', 'line2', 'line3']# partition() - 分成三部分s = 'hello-world'print(s.partition('-')) # ('hello', '-', 'world')# join() - 连接字符串words = ['apple', 'banana', 'orange']print(','.join(words)) # 'apple,banana,orange'print(' '.join(words)) # 'apple banana orange'# 连接数字需要先转字符串numbers = [1, 2, 3]print(','.join(map(str, numbers))) # '1,2,3'
4. 去除空白
s = ' Hello World '# 去除两端空白print(s.strip()) # 'Hello World'# 去除左边空白print(s.lstrip()) # 'Hello World '# 去除右边空白print(s.rstrip()) # ' Hello World'# 去除指定字符s = '***Hello***'print(s.strip('*')) # 'Hello's = 'www.example.com'print(s.strip('cmowz.')) # 'example'# 去除换行符s = 'Hello\n'print(s.rstrip('\n')) # 'Hello'
5. 对齐和填充
s = 'Hello'# 左对齐print(s.ljust(10)) # 'Hello 'print(s.ljust(10, '*')) # 'Hello*****'# 右对齐print(s.rjust(10)) # ' Hello'print(s.rjust(10, '*')) # '*****Hello'# 居中print(s.center(10)) # ' Hello 'print(s.center(10, '*')) # '**Hello***'# 填充0(数字)num = '42'print(num.zfill(5)) # '00042'num = '-42'print(num.zfill(5)) # '-0042'
6. 判断类型
# 是否全是字母print('Hello'.isalpha()) # Trueprint('Hello123'.isalpha()) # False# 是否全是数字print('123'.isdigit()) # Trueprint('12.3'.isdigit()) # False# 是否全是字母或数字print('Hello123'.isalnum()) # Trueprint('Hello 123'.isalnum()) # False# 是否全是空白print(' '.isspace()) # Trueprint(' a '.isspace()) # False# 是否全是小写print('hello'.islower()) # Trueprint('Hello'.islower()) # False# 是否全是大写print('HELLO'.isupper()) # Trueprint('Hello'.isupper()) # False# 是否是标题格式print('Hello World'.istitle()) # Trueprint('Hello world'.istitle()) # False# 是否是十进制数字print('123'.isdecimal()) # Trueprint('½'.isdecimal()) # False# 是否是数字(包括Unicode数字)print('123'.isnumeric()) # Trueprint('½'.isnumeric()) # Trueprint('Ⅳ'.isnumeric()) # True (罗马数字)# 是否是合法标识符print('variable'.isidentifier()) # Trueprint('123abc'.isidentifier()) # Falseprint('_var'.isidentifier()) # True# 是否可打印print('Hello'.isprintable()) # Trueprint('Hello\n'.isprintable()) # False# 是否是ASCIIprint('Hello'.isascii()) # Trueprint('你好'.isascii()) # False
7. 编码和解码
# 编码为字节s = 'Hello 世界'bytes_utf8 = s.encode('utf-8')print(bytes_utf8) # b'Hello \xe4\xb8\x96\xe7\x95\x8c'bytes_gbk = s.encode('gbk')print(bytes_gbk) # b'Hello \xca\xc0\xbd\xe7'# 解码为字符串s = bytes_utf8.decode('utf-8')print(s) # 'Hello 世界'# 处理编码错误bytes_data = b'\xff\xfe'print(bytes_data.decode('utf-8', errors='ignore')) # 忽略错误print(bytes_data.decode('utf-8', errors='replace')) # 替换为�
8. 格式化
# 旧式格式化(%)name = 'Alice'age = 25print('Name: %s, Age: %d' % (name, age)) # 'Name: Alice, Age: 25'# format() 方法print('Name: {}, Age: {}'.format(name, age)) # 'Name: Alice, Age: 25'print('Name: {0}, Age: {1}'.format(name, age)) # 'Name: Alice, Age: 25'print('Name: {n}, Age: {a}'.format(n=name, a=age)) # 'Name: Alice, Age: 25'# f-string (Python 3.6+,推荐)print(f'Name: {name}, Age: {age}') # 'Name: Alice, Age: 25'# f-string 表达式x, y = 10, 20print(f'{x} + {y} = {x + y}') # '10 + 20 = 30'# f-string 格式化pi = 3.141592653589793print(f'π ≈ {pi:.2f}') # 'π ≈ 3.14'# f-string 调用方法name = 'alice'print(f'Hello, {name.upper()}!') # 'Hello, ALICE!'# f-string 字典person = {'name': 'Bob', 'age': 30}print(f"Name: {person['name']}, Age: {person['age']}") # 'Name: Bob, Age: 30'# f-string 调试(Python 3.8+)x = 10print(f'{x=}') # 'x=10'
四、字符串格式化详解
# 1. 基本格式化name = 'Alice'age = 25# 位置参数print('{} is {} years old'.format(name, age))# 'Alice is 25 years old'# 索引参数print('{0} is {1} years old'.format(name, age))# 'Alice is 25 years old'# 关键字参数print('{n} is {a} years old'.format(n=name, a=age))# 'Alice is 25 years old'# 2. 数字格式化num = 42# 二进制print('{:b}'.format(num)) # '101010'# 八进制print('{:o}'.format(num)) # '52'# 十六进制print('{:x}'.format(num)) # '2a'print('{:X}'.format(num)) # '2A'# 3. 浮点数格式化pi = 3.141592653589793# 保留小数print('{:.2f}'.format(pi)) # '3.14'print('{:.4f}'.format(pi)) # '3.1416'# 科学计数法print('{:.2e}'.format(1234567)) # '1.23e+06'print('{:.2E}'.format(1234567)) # '1.23E+06'# 百分比print('{:.2%}'.format(0.85)) # '85.00%'# 4. 对齐text = 'Hello'# 左对齐print('{:<10}'.format(text)) # 'Hello '# 右对齐print('{:>10}'.format(text)) # ' Hello'# 居中print('{:^10}'.format(text)) # ' Hello '# 填充字符print('{:*<10}'.format(text)) # 'Hello*****'print('{:*>10}'.format(text)) # '*****Hello'print('{:*^10}'.format(text)) # '**Hello***'# 5. 千位分隔符num = 1234567890print('{:,}'.format(num)) # '1,234,567,890'print('{:_}'.format(num)) # '1_234_567_890'# 6. 正负号print('{:+}'.format(42)) # '+42'print('{:+}'.format(-42)) # '-42'print('{: }'.format(42)) # ' 42'# 7. 组合使用price = 1234.5print('{:>10,.2f}'.format(price)) # ' 1,234.50'
五、字符串常见操作
1. 字符串拼接
# 方法1:+ 运算符s1 = 'Hello's2 = 'World'result = s1 + ' ' + s2print(result) # 'Hello World'# 方法2:join()(推荐,更高效)words = ['Hello', 'World']result = ' '.join(words)print(result) # 'Hello World'# 方法3:f-stringname = 'Alice'age = 25result = f'{name} is {age} years old'print(result) # 'Alice is 25 years old'# 方法4:format()result = '{} is {} years old'.format(name, age)print(result) # 'Alice is 25 years old'# 方法5:% 格式化result = '%s is %d years old' % (name, age)print(result) # 'Alice is 25 years old'# ⚠️ 性能对比:大量拼接时# ❌ 低效result = ''for i in range(1000): result += str(i) # 每次创建新字符串# ✅ 高效result = ''.join(str(i) for i in range(1000))
2. 字符串反转
s = 'Hello'# 方法1:切片(最简单)print(s[::-1]) # 'olleH'# 方法2:reversed() + join()print(''.join(reversed(s))) # 'olleH'# 方法3:递归def reverse_recursive(s): if len(s) <= 1: return s return reverse_recursive(s[1:]) + s[0]print(reverse_recursive(s)) # 'olleH'
3. 字符串去重
s = 'hello'# 方法1:set + join(不保持顺序)print(''.join(set(s))) # 'helo' (顺序不定)# 方法2:保持顺序def remove_duplicates(s): seen = set() result = [] for char in s: if char not in seen: seen.add(char) result.append(char) return ''.join(result)print(remove_duplicates(s)) # 'helo'# 方法3:dict.fromkeys()(Python 3.7+保持顺序)print(''.join(dict.fromkeys(s))) # 'helo'
4. 字符串匹配
import res = 'My email is alice@example.com and bob@test.com'# 查找所有邮箱emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', s)print(emails) # ['alice@example.com', 'bob@test.com']# 替换result = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', s)print(result) # 'My email is [EMAIL] and [EMAIL]'# 分割s = 'apple, banana; orange|grape'result = re.split(r'[,;|]', s)print(result) # ['apple', ' banana', ' orange', 'grape']# 匹配pattern = r'^[A-Za-z0-9]+$'print(re.match(pattern, 'Hello123')) # Match对象print(re.match(pattern, 'Hello 123')) # None
六、字符串实战案例
案例1:验证输入
def validate_email(email): """验证邮箱格式""" import re pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' return bool(re.match(pattern, email))print(validate_email('alice@example.com')) # Trueprint(validate_email('invalid.email')) # Falsedef validate_phone(phone): """验证手机号(中国)""" import re pattern = r'^1[3-9]\d{9}$' return bool(re.match(pattern, phone))print(validate_phone('13812345678')) # Trueprint(validate_phone('12345678901')) # Falsedef validate_password(password): """验证密码强度(至少8位,包含大小写字母和数字)""" if len(password) < 8: return False has_upper = any(c.isupper() for c in password) has_lower = any(c.islower() for c in password) has_digit = any(c.isdigit() for c in password) return has_upper and has_lower and has_digitprint(validate_password('Abc12345')) # Trueprint(validate_password('abc12345')) # False (无大写)
案例2:文本处理
def word_count(text): """统计单词数""" words = text.split() return len(words)def char_count(text, ignore_space=True): """统计字符数""" if ignore_space: text = text.replace(' ', '') return len(text)def most_common_word(text): """找出最常见的单词""" from collections import Counter words = text.lower().split() counter = Counter(words) return counter.most_common(1)[0]text = 'hello world hello python hello'print(word_count(text)) # 5print(char_count(text)) # 25print(most_common_word(text)) # ('hello', 3)
案例3:字符串加密
def caesar_cipher(text, shift=3): """凯撒密码加密""" result = [] for char in text: if char.isalpha(): start = ord('A') if char.isupper() else ord('a') shifted = (ord(char) - start + shift) % 26 + start result.append(chr(shifted)) else: result.append(char) return ''.join(result)def caesar_decipher(text, shift=3): """凯撒密码解密""" return caesar_cipher(text, -shift)text = 'Hello World'encrypted = caesar_cipher(text, 3)print(encrypted) # 'Khoor Zruog'decrypted = caesar_decipher(encrypted, 3)print(decrypted) # 'Hello World'