目标:
Python代码实现梯度下降法求解最小平方根(求平方根),当精度达到指定值时停止迭代。
关键点:
- • 收敛条件:当连续两次迭代的 x 值变化小于 0.001 时停止
- • 学习率:需要适当选择(通常 0.01-0.1)
基础实现:使用梯度下降法求平方根
import numpy as npimport matplotlib.pyplot as pltdef gradient_descent_sqrt(target, learning_rate=0.1, max_iter=10000, tolerance=0.001): """ 使用梯度下降法求平方根 参数: target: 要求平方根的数 learning_rate: 学习率 max_iter: 最大迭代次数 tolerance: 停止迭代的精度 返回: (近似平方根, 迭代次数, 损失历史) """ if target < 0: raise ValueError("不能对负数求平方根") # 初始化猜测值(可以设置为target/2或1.0) if target >= 1: x = target / 2.0 else: x = 1.0 loss_history = [] x_history = [] for i in range(max_iter): # 计算损失函数:L(x) = (x^2 - target)^2 loss = (x**2 - target)**2 loss_history.append(loss) x_history.append(x) # 计算梯度:dL/dx = 4x(x^2 - target) gradient = 4 * x * (x**2 - target) # 更新参数:x = x - learning_rate * gradient x_new = x - learning_rate * gradient # 检查收敛条件 if abs(x_new - x) < tolerance: print(f"在迭代 {i+1} 次后收敛") x = x_new break x = x_new # 每1000次迭代打印进度 if i % 1000 == 0 and i > 0: print(f"迭代 {i}: x = {x:.6f}, 损失 = {loss:.6f}") else: print(f"达到最大迭代次数 {max_iter}") return x, len(loss_history), loss_history, x_history# 测试函数def test_gradient_descent_sqrt(): """测试梯度下降法求平方根""" target = 25 # 求 √25 print("=" * 60) print(f"使用梯度下降法求 √{target}") print("=" * 60) # 使用不同学习率进行测试 learning_rates = [0.001, 0.01, 0.05, 0.1] for lr in learning_rates: print(f"\n学习率: {lr}") print("-" * 40) sqrt_approx, iterations, losses, xs = gradient_descent_sqrt( target=target, learning_rate=lr, tolerance=0.001 ) actual_sqrt = np.sqrt(target) error = abs(sqrt_approx - actual_sqrt) print(f"近似值: {sqrt_approx:.6f}") print(f"真实值: {actual_sqrt:.6f}") print(f"误差: {error:.6f}") print(f"迭代次数: {iterations}") # 绘制损失函数和参数更新过程 plot_results(losses, xs, lr, target)def plot_results(losses, xs, lr, target): """绘制结果""" fig, axes = plt.subplots(1, 3, figsize=(15, 4)) # 损失函数曲线 axes[0].plot(losses) axes[0].set_xlabel('迭代次数') axes[0].set_ylabel('损失') axes[0].set_title(f'损失函数 (lr={lr})') axes[0].grid(True) axes[0].set_yscale('log') # 使用对数坐标 # 参数更新过程 axes[1].plot(xs) axes[1].axhline(y=np.sqrt(target), color='r', linestyle='--', label=f'真实值: {np.sqrt(target):.4f}') axes[1].set_xlabel('迭代次数') axes[1].set_ylabel('x值') axes[1].set_title(f'参数更新过程 (lr={lr})') axes[1].legend() axes[1].grid(True) # 损失函数曲面(简化版本) x_range = np.linspace(0, target, 100) loss_curve = (x_range**2 - target)**2 axes[2].plot(x_range, loss_curve, 'b-', label='损失函数') axes[2].plot(xs, [(x**2 - target)**2 for x in xs], 'ro-', markersize=3, alpha=0.5, label='优化路径') axes[2].set_xlabel('x') axes[2].set_ylabel('损失') axes[2].set_title(f'损失函数曲面 (lr={lr})') axes[2].legend() axes[2].grid(True) plt.tight_layout() plt.show()# 运行测试test_gradient_descent_sqrt()
改进版本:带自适应学习率和动量
import numpy as npimport matplotlib.pyplot as pltfrom time import timeclass AdaptiveGradientDescentSqrt: """带自适应学习率的梯度下降法求平方根""" def __init__(self, learning_rate=0.1, momentum=0.9, decay=0.99): """ 参数: learning_rate: 初始学习率 momentum: 动量系数 decay: 学习率衰减系数 """ self.lr = learning_rate self.momentum = momentum self.decay = decay self.velocity = 0 # 动量项 def compute_gradient(self, x, target): """计算梯度""" return 4 * x * (x**2 - target) def compute_loss(self, x, target): """计算损失""" return (x**2 - target)**2 def optimize(self, target, initial_guess=None, tolerance=0.001, max_iter=10000): """优化过程""" if target < 0: raise ValueError("不能对负数求平方根") # 初始化猜测值 if initial_guess is None: x = target / 2.0 if target >= 1 else 1.0 else: x = initial_guess current_lr = self.lr loss_history = [] x_history = [] gradient_history = [] lr_history = [] start_time = time() for i in range(max_iter): # 计算当前损失 loss = self.compute_loss(x, target) loss_history.append(loss) x_history.append(x) # 计算梯度 grad = self.compute_gradient(x, target) gradient_history.append(abs(grad)) # 使用动量更新 self.velocity = self.momentum * self.velocity - current_lr * grad x_new = x + self.velocity # 记录学习率 lr_history.append(current_lr) # 检查收敛条件 if abs(x_new - x) < tolerance: print(f"在迭代 {i+1} 次后收敛") print(f"最终学习率: {current_lr:.6f}") x = x_new break # 更新参数 x = x_new # 学习率衰减 current_lr *= self.decay # 如果梯度太大,减小学习率 if abs(grad) > 100: current_lr *= 0.5 # 每500次迭代打印进度 if i % 500 == 0 and i > 0: print(f"迭代 {i}: x = {x:.6f}, 损失 = {loss:.6f}, 梯度 = {grad:.6f}, lr = {current_lr:.6f}") else: print(f"达到最大迭代次数 {max_iter}") elapsed_time = time() - start_time print(f"优化耗时: {elapsed_time:.4f}秒") return { 'x': x, 'iterations': len(loss_history), 'loss_history': loss_history, 'x_history': x_history, 'gradient_history': gradient_history, 'lr_history': lr_history, 'time': elapsed_time }def compare_methods(): """比较不同优化方法""" target = 25 actual_sqrt = np.sqrt(target) print("=" * 70) print(f"比较不同优化方法求 √{target}") print("=" * 70) # 1. 标准梯度下降 print("\n1. 标准梯度下降") sqrt_approx, iterations, losses, _ = gradient_descent_sqrt( target=target, learning_rate=0.05, tolerance=0.001 ) error = abs(sqrt_approx - actual_sqrt) print(f"结果: {sqrt_approx:.6f}, 误差: {error:.6f}, 迭代次数: {iterations}") # 2. 自适应梯度下降 print("\n2. 自适应梯度下降") optimizer = AdaptiveGradientDescentSqrt( learning_rate=0.1, momentum=0.9, decay=0.995 ) result = optimizer.optimize(target, tolerance=0.001) error = abs(result['x'] - actual_sqrt) print(f"结果: {result['x']:.6f}, 误差: {error:.6f}, 迭代次数: {result['iterations']}") # 3. 使用SciPy的牛顿法(对比) print("\n3. SciPy牛顿法(对比)") from scipy.optimize import newton def f(x): return x**2 - target def fprime(x): return 2*x newton_result = newton(f, target/2, fprime=fprime, tol=1e-3, maxiter=100) error = abs(newton_result - actual_sqrt) print(f"结果: {newton_result:.6f}, 误差: {error:.6f}") # 可视化比较 visualize_comparison([losses, result['loss_history']], ['标准梯度下降', '自适应梯度下降'])def visualize_comparison(loss_lists, labels): """可视化比较""" fig, axes = plt.subplots(1, 3, figsize=(15, 4)) # 损失函数比较 for losses, label in zip(loss_lists, labels): axes[0].plot(losses[:1000], label=label) # 只显示前1000次迭代 axes[0].set_xlabel('迭代次数') axes[0].set_ylabel('损失') axes[0].set_title('损失函数下降比较') axes[0].set_yscale('log') axes[0].legend() axes[0].grid(True) # 收敛速度比较 convergence_points = [] for losses, label in zip(loss_lists, labels): # 找到损失小于0.001的迭代点 for i, loss in enumerate(losses): if loss < 1e-3: convergence_points.append((i, label)) break for i, label in convergence_points: axes[1].bar(label, i, alpha=0.7, label=f'收敛于{i}次迭代') axes[1].set_xlabel('方法') axes[1].set_ylabel('收敛所需迭代次数') axes[1].set_title('收敛速度比较') axes[1].legend() axes[1].grid(True, axis='y') # 学习率变化(自适应方法) optimizer = AdaptiveGradientDescentSqrt() result = optimizer.optimize(25, tolerance=0.001) axes[2].plot(result['lr_history']) axes[2].set_xlabel('迭代次数') axes[2].set_ylabel('学习率') axes[2].set_title('自适应学习率变化') axes[2].grid(True) plt.tight_layout() plt.show()# 运行比较compare_methods()
改进版本:批量梯度下降版本实现
import numpy as npdef batch_gradient_descent_sqrt(target, learning_rate=0.1, batch_size=10, tolerance=0.001, max_iter=10000): """ 批量梯度下降法求平方根 可以同时求多个数的平方根 参数: target: 要求平方根的数(可以是单个数字或数组) learning_rate: 学习率 batch_size: 批量大小(当target是数组时) tolerance: 停止迭代的精度 max_iter: 最大迭代次数 返回: 平方根近似值 """ # 处理输入 if isinstance(target, (int, float)): targets = np.array([target]) else: targets = np.array(target) n_targets = len(targets) # 初始化猜测值 x = np.where(targets >= 1, targets / 2.0, 1.0) for i in range(max_iter): # 计算批量梯度 gradients = 4 * x * (x**2 - targets) # 更新参数 x_new = x - learning_rate * gradients # 检查收敛条件 max_change = np.max(np.abs(x_new - x)) if max_change < tolerance: print(f"在迭代 {i+1} 次后收敛,最大变化: {max_change:.6f}") x = x_new break x = x_new # 每1000次迭代打印进度 if i % 1000 == 0 and i > 0: losses = (x**2 - targets)**2 avg_loss = np.mean(losses) print(f"迭代 {i}: 平均损失 = {avg_loss:.6f}, 最大变化 = {max_change:.6f}") else: print(f"达到最大迭代次数 {max_iter}") return x[0] if n_targets == 1 else xdef test_batch_gradient_descent(): """测试批量梯度下降""" # 测试单个数字 print("测试单个数字:") target = 36 sqrt_approx = batch_gradient_descent_sqrt(target, learning_rate=0.05, tolerance=0.001) print(f"√{target} ≈ {sqrt_approx:.6f}, 真实值 = {np.sqrt(target):.6f}, " f"误差 = {abs(sqrt_approx - np.sqrt(target)):.6f}") # 测试多个数字 print("\n测试多个数字:") targets = [4, 9, 16, 25, 36] sqrt_approxs = batch_gradient_descent_sqrt(targets, learning_rate=0.03, tolerance=0.001) print("结果比较:") print(f"{'目标值':<10} {'近似值':<12} {'真实值':<12} {'误差':<10}") print("-" * 50) for t, approx in zip(targets, sqrt_approxs): actual = np.sqrt(t) error = abs(approx - actual) print(f"{t:<10} {approx:<12.6f} {actual:<12.6f} {error:<10.6f}")# 运行测试test_batch_gradient_descent()
可视化优化过程
import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.animation import FuncAnimationdef visualize_gradient_descent_animation(): """可视化梯度下降过程的动画""" target = 16 actual_sqrt = np.sqrt(target) # 准备数据 x_range = np.linspace(0, 8, 400) loss_func = (x_range**2 - target)**2 grad_func = 4 * x_range * (x_range**2 - target) # 初始化 x = 7.0 # 初始猜测值 learning_rate = 0.01 iterations = 100 # 存储历史记录 x_history = [x] loss_history = [(x**2 - target)**2] # 模拟梯度下降 for i in range(iterations): gradient = 4 * x * (x**2 - target) x = x - learning_rate * gradient x_history.append(x) loss_history.append((x**2 - target)**2) # 创建图形 fig, axes = plt.subplots(2, 2, figsize=(12, 8)) # 损失函数曲面 axes[0, 0].plot(x_range, loss_func, 'b-', linewidth=2, label='损失函数') axes[0, 0].set_xlabel('x') axes[0, 0].set_ylabel('损失') axes[0, 0].set_title('损失函数曲面') axes[0, 0].grid(True) axes[0, 0].legend() point, = axes[0, 0].plot([], [], 'ro', markersize=8) # 梯度函数 axes[0, 1].plot(x_range, grad_func, 'g-', linewidth=2, label='梯度函数') axes[0, 1].axhline(y=0, color='k', linestyle='--', alpha=0.5) axes[0, 1].set_xlabel('x') axes[0, 1].set_ylabel('梯度') axes[0, 1].set_title('梯度函数') axes[0, 1].grid(True) axes[0, 1].legend() grad_point, = axes[0, 1].plot([], [], 'ro', markersize=8) # 参数更新过程 axes[1, 0].set_xlabel('迭代次数') axes[1, 0].set_ylabel('x值') axes[1, 0].set_title('参数更新过程') axes[1, 0].axhline(y=actual_sqrt, color='r', linestyle='--', label=f'真实值: {actual_sqrt:.4f}') axes[1, 0].grid(True) axes[1, 0].legend() line, = axes[1, 0].plot([], [], 'b-', linewidth=2) current_point, = axes[1, 0].plot([], [], 'ro', markersize=6) # 损失下降过程 axes[1, 1].set_xlabel('迭代次数') axes[1, 1].set_ylabel('损失') axes[1, 1].set_title('损失下降过程') axes[1, 1].set_yscale('log') axes[1, 1].grid(True) loss_line, = axes[1, 1].plot([], [], 'b-', linewidth=2) loss_point, = axes[1, 1].plot([], [], 'ro', markersize=6) # 动画更新函数 def update(frame): idx = frame # 更新损失函数图 point.set_data([x_history[idx]], [loss_history[idx]]) # 更新梯度图 current_grad = 4 * x_history[idx] * (x_history[idx]**2 - target) grad_point.set_data([x_history[idx]], [current_grad]) # 更新参数更新图 line.set_data(range(idx+1), x_history[:idx+1]) current_point.set_data([idx], [x_history[idx]]) # 更新损失下降图 loss_line.set_data(range(idx+1), loss_history[:idx+1]) loss_point.set_data([idx], [loss_history[idx]]) # 更新标题显示当前迭代 for ax in axes.flat: ax.set_title(ax.get_title().split('(')[0]) axes[0, 0].set_title(f'损失函数曲面 (迭代: {idx})') return point, grad_point, line, current_point, loss_line, loss_point # 创建动画 anim = FuncAnimation(fig, update, frames=len(x_history), interval=100, blit=True, repeat=True) plt.tight_layout() plt.show() return anim# 运行可视化(注意:可能需要调整交互式环境)# anim = visualize_gradient_descent_animation()
实用工具函数
def sqrt_gradient_descent(target, method='standard', **kwargs): """ 求平方根的梯度下降法封装函数 参数: target: 要求平方根的数 method: 方法选择 ('standard', 'adaptive', 'batch') **kwargs: 其他参数 返回: 平方根近似值 """ if method == 'standard': sqrt_approx, _, _, _ = gradient_descent_sqrt(target, **kwargs) elif method == 'adaptive': optimizer = AdaptiveGradientDescentSqrt( learning_rate=kwargs.get('learning_rate', 0.1), momentum=kwargs.get('momentum', 0.9), decay=kwargs.get('decay', 0.99) ) result = optimizer.optimize(target, tolerance=kwargs.get('tolerance', 0.001), max_iter=kwargs.get('max_iter', 10000)) sqrt_approx = result['x'] elif method == 'batch': sqrt_approx = batch_gradient_descent_sqrt(target, **kwargs) else: raise ValueError(f"未知方法: {method}") return sqrt_approxdef benchmark_sqrt_methods(): """性能基准测试""" import time test_numbers = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29] methods = ['standard', 'adaptive'] results = {} for method in methods: print(f"\n测试方法: {method}") print("-" * 40) start_time = time.time() approximations = [] errors = [] for num in test_numbers: approx = sqrt_gradient_descent(num, method=method, tolerance=0.001) actual = np.sqrt(num) error = abs(approx - actual) approximations.append(approx) errors.append(error) print(f"√{num:2d}: {approx:.6f} (误差: {error:.6f})") elapsed_time = time.time() - start_time results[method] = { 'approximations': approximations, 'errors': errors, 'avg_error': np.mean(errors), 'max_error': np.max(errors), 'time': elapsed_time } print(f"平均误差: {np.mean(errors):.6f}") print(f"最大误差: {np.max(errors):.6f}") print(f"总耗时: {elapsed_time:.4f}秒") # 绘制比较图 fig, axes = plt.subplots(1, 2, figsize=(12, 5)) # 误差比较 for method in methods: axes[0].plot(test_numbers, results[method]['errors'], 'o-', label=f'{method} (平均: {results[method]["avg_error"]:.6f})') axes[0].set_xlabel('目标数') axes[0].set_ylabel('误差') axes[0].set_title('不同方法的误差比较') axes[0].legend() axes[0].grid(True) # 时间比较 times = [results[method]['time'] for method in methods] axes[1].bar(methods, times, alpha=0.7) axes[1].set_xlabel('方法') axes[1].set_ylabel('耗时(秒)') axes[1].set_title('计算时间比较') axes[1].grid(True, axis='y') for i, (method, time_val) in enumerate(zip(methods, times)): axes[1].text(i, time_val + 0.01, f'{time_val:.3f}s', ha='center', va='bottom') plt.tight_layout() plt.show()# 运行基准测试print("性能基准测试")print("=" * 50)benchmark_sqrt_methods()
使用示例
# 主程序示例if __name__ == "__main__": print("梯度下降法求平方根演示") print("=" * 60) # 示例1:求单个数的平方根 print("\n示例1:求 √25") sqrt_approx = sqrt_gradient_descent(25, method='adaptive', tolerance=0.001) print(f"近似值: {sqrt_approx:.6f}") print(f"真实值: {np.sqrt(25):.6f}") print(f"误差: {abs(sqrt_approx - np.sqrt(25)):.6f}") # 示例2:批量计算 print("\n示例2:批量计算多个平方根") numbers = [4, 9, 16, 25, 36, 49, 64] approximations = [] for num in numbers: approx = sqrt_gradient_descent(num, method='standard', learning_rate=0.05, tolerance=0.001) approximations.append(approx) print(f"{'数字':<8} {'近似值':<12} {'真实值':<12} {'误差':<10}") print("-" * 50) for num, approx in zip(numbers, approximations): actual = np.sqrt(num) error = abs(approx - actual) print(f"{num:<8} {approx:<12.6f} {actual:<12.6f} {error:<10.6f}") # 示例3:比较不同精度要求 print("\n示例3:不同精度要求下的迭代次数") precisions = [0.1, 0.01, 0.001, 0.0001, 0.00001] for prec in precisions: sqrt_approx, iterations, _, _ = gradient_descent_sqrt( target=25, learning_rate=0.05, tolerance=prec ) error = abs(sqrt_approx - np.sqrt(25)) print(f"精度要求: {prec:.6f}, 迭代次数: {iterations:4d}, " f"最终误差: {error:.8f}")