当前位置：首页>python>一天一个Python知识点——Day 151:深度学习概述

一天一个Python知识点——Day 151:深度学习概述

2026-02-06 21:04:09

一、什么是深度学习？

1. 深度学习的定义

深度学习是机器学习的一个子领域，它通过模拟人脑的神经网络结构，使用多层非线性变换来学习数据的层次化表示。其核心思想是让机器自动学习特征表示，而不是依赖人工设计的特征。

# 深度学习与传统机器学习的对比import matplotlib.pyplot as plt# 可视化对比fig, axes = plt.subplots(1, 2, figsize=(12, 4))# 传统机器学习axes[0].text(0.5, 0.5, '特征工程 → 模型训练\n\n人工特征设计\n+ 传统算法\n= 有限表现力',              ha='center', va='center', fontsize=12, fontweight='bold')axes[0].set_title('传统机器学习', fontsize=14, fontweight='bold')axes[0].axis('off')# 深度学习axes[1].text(0.5, 0.5, '原始数据 → 深度学习\n\n自动特征学习\n+ 多层神经网络\n= 强大表现力',              ha='center', va='center', fontsize=12, fontweight='bold')axes[1].set_title('深度学习', fontsize=14, fontweight='bold')axes[1].axis('off')plt.suptitle('传统机器学习 vs 深度学习', fontsize=16, fontweight='bold')plt.tight_layout()plt.show()

2. 深度学习的历史里程碑

# 深度学习发展时间线timeline_data = {    '1943': 'McCulloch & Pitts提出人工神经元模型',    '1958': 'Frank Rosenblatt发明感知机',    '1969': 'Minsky & Papert指出感知机的局限性',    '1986': '反向传播算法重新被发现',    '1998': 'Yann LeCun提出LeNet-5（卷积神经网络）',    '2006': 'Geoffrey Hinton提出深度信念网络',    '2012': 'AlexNet在ImageNet比赛中大获成功',    '2014': '生成对抗网络（GAN）被提出',    '2015': 'ResNet解决深度网络训练难题',    '2017': 'Transformer架构革命性突破',    '2020': 'GPT-3展现强大语言能力',    '2022': '扩散模型引领图像生成革命'}print("深度学习发展里程碑:")for year, event in timeline_data.items():    print(f"{year}: {event}")

二、神经网络基础

1. 人工神经元模型

import numpy as npclass ArtificialNeuron:    """实现基本的人工神经元"""    def __init__(self, input_size, activation='sigmoid'):        """        初始化神经元        Args:            input_size: 输入特征数量            activation: 激活函数类型        """        # 初始化权重和偏置        self.weights = np.random.randn(input_size) * 0.1        self.bias = np.random.randn() * 0.1        self.activation_type = activation    def activate(self, x):        """前向传播计算"""        # 线性组合        z = np.dot(x, self.weights) + self.bias        # 应用激活函数        if self.activation_type == 'sigmoid':            return self._sigmoid(z)        elif self.activation_type == 'relu':            return self._relu(z)        elif self.activation_type == 'tanh':            return self._tanh(z)        else:            return z  # 线性激活    def _sigmoid(self, x):        """Sigmoid激活函数"""        return 1 / (1 + np.exp(-x))    def _relu(self, x):        """ReLU激活函数"""        return np.maximum(0, x)    def _tanh(self, x):        """Tanh激活函数"""        return np.tanh(x)    def __call__(self, x):        """使神经元可调用"""        return self.activate(x)# 使用示例neuron = ArtificialNeuron(3, activation='sigmoid')inputs = np.array([0.5, -0.2, 0.8])output = neuron(inputs)print(f"神经元输出: {output:.4f}")

2. 常用激活函数及其特性

import numpy as npimport matplotlib.pyplot as pltdef plot_activation_functions():    """绘制常用激活函数"""    x = np.linspace(-5, 5, 100)    # 定义激活函数    functions = {        'Sigmoid': lambda x: 1 / (1 + np.exp(-x)),        'ReLU': lambda x: np.maximum(0, x),        'Leaky ReLU': lambda x: np.where(x > 0, x, 0.01 * x),        'Tanh': lambda x: np.tanh(x),        'Swish': lambda x: x * (1 / (1 + np.exp(-x))),        'ELU': lambda x: np.where(x > 0, x, np.exp(x) - 1),        'Softplus': lambda x: np.log(1 + np.exp(x))    }    # 创建子图    fig, axes = plt.subplots(2, 4, figsize=(15, 8))    axes = axes.ravel()    for idx, (name, func) in enumerate(functions.items()):        ax = axes[idx]        y = func(x)        ax.plot(x, y, 'b-', linewidth=2)        ax.axhline(y=0, color='k', linestyle='-', alpha=0.3)        ax.axvline(x=0, color='k', linestyle='-', alpha=0.3)        ax.grid(True, alpha=0.3)        ax.set_title(name, fontsize=12, fontweight='bold')        # 添加特性标注        if name == 'Sigmoid':            ax.text(0, 0.5, '输出范围: (0,1)\n平滑、可微\n存在梯度消失问题',                     fontsize=9, ha='center')        elif name == 'ReLU':            ax.text(0, 2.5, '稀疏激活\n计算高效\n存在"死神经元"问题',                     fontsize=9, ha='center')    # 调整布局    plt.tight_layout()    plt.suptitle('常用激活函数', fontsize=16, fontweight='bold', y=1.02)    plt.show()# 绘制激活函数plot_activation_functions()

三、深度学习架构类型

1. 前馈神经网络（FNN）

class FeedForwardNeuralNetwork:    """实现简单的前馈神经网络"""    def __init__(self, layer_sizes, activations=None):        """        初始化神经网络        Args:            layer_sizes: 每层神经元数量列表，如[10, 20, 5, 1]            activations: 每层的激活函数列表        """        self.layer_sizes = layer_sizes        self.num_layers = len(layer_sizes) - 1        # 初始化权重和偏置        self.weights = []        self.biases = []        for i in range(self.num_layers):            # Xavier/Glorot初始化            scale = np.sqrt(2.0 / (layer_sizes[i] + layer_sizes[i+1]))            w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * scale            b = np.zeros(layer_sizes[i+1])            self.weights.append(w)            self.biases.append(b)        # 设置激活函数        if activations is None:            activations = ['relu'] * (self.num_layers - 1) + ['sigmoid']        self.activations = activations    def forward(self, X):        """前向传播"""        self.activations_history = [X]        self.z_history = []        a = X        for i in range(self.num_layers):            z = np.dot(a, self.weights[i]) + self.biases[i]            self.z_history.append(z)            if self.activations[i] == 'sigmoid':                a = 1 / (1 + np.exp(-z))            elif self.activations[i] == 'relu':                a = np.maximum(0, z)            elif self.activations[i] == 'tanh':                a = np.tanh(z)            else:                a = z  # 线性激活            self.activations_history.append(a)        return a    def predict(self, X):        """预测"""        return self.forward(X)    def summary(self):        """打印网络结构信息"""        print("=" * 50)        print("神经网络结构摘要")        print("=" * 50)        print(f"层数: {self.num_layers}")        print(f"神经元配置: {self.layer_sizes}")        print(f"激活函数: {self.activations}")        total_params = 0        for i, (w, b) in enumerate(zip(self.weights, self.biases)):            params = w.size + b.size            total_params += params            print(f"层 {i+1}: {w.shape[0]} → {w.shape[1]} "                  f"(权重: {w.shape}, 偏置: {b.shape}) "                  f"参数数: {params:,}")        print(f"总参数数: {total_params:,}")        print("=" * 50)# 创建并测试神经网络nn = FeedForwardNeuralNetwork(    layer_sizes=[10, 20, 15, 1],    activations=['relu', 'relu', 'sigmoid'])nn.summary()# 测试前向传播X_test = np.random.randn(5, 10)  # 5个样本，10个特征output = nn.predict(X_test)print(f"\n输入形状: {X_test.shape}")print(f"输出形状: {output.shape}")

2. 卷积神经网络（CNN）

class SimpleCNN:    """实现简化的卷积神经网络"""    def __init__(self, input_shape=(28, 28, 1)):        """初始化CNN"""        self.input_shape = input_shape        self.layers = []    def add_conv_layer(self, filters=32, kernel_size=3, activation='relu'):        """添加卷积层"""        layer_info = {            'type': 'conv',            'filters': filters,            'kernel_size': kernel_size,            'activation': activation        }        self.layers.append(layer_info)        return self    def add_pooling_layer(self, pool_size=2, stride=2):        """添加池化层"""        layer_info = {            'type': 'pool',            'pool_size': pool_size,            'stride': stride        }        self.layers.append(layer_info)        return self    def add_dense_layer(self, units, activation='relu'):        """添加全连接层"""        layer_info = {            'type': 'dense',            'units': units,            'activation': activation        }        self.layers.append(layer_info)        return self    def add_flatten_layer(self):        """添加展平层"""        self.layers.append({'type': 'flatten'})        return self    def forward(self, X):        """前向传播（简化版）"""        # 模拟卷积操作        output = X        for layer in self.layers:            if layer['type'] == 'conv':                # 简化的卷积操作                output = self._conv_forward(output, layer)            elif layer['type'] == 'pool':                # 简化的池化操作                output = self._pool_forward(output, layer)            elif layer['type'] == 'flatten':                # 展平操作                output = output.reshape(output.shape[0], -1)            elif layer['type'] == 'dense':                # 全连接层                output = self._dense_forward(output, layer)        return output    def _conv_forward(self, X, layer_params):        """简化的卷积前向传播"""        # 在实际实现中，这里会有实际的卷积计算        # 这里我们返回一个简化的结果        batch_size, height, width, channels = X.shape        filters = layer_params['filters']        # 简化的输出形状计算        kernel_size = layer_params['kernel_size']        output_height = height - kernel_size + 1        output_width = width - kernel_size + 1        return np.random.randn(batch_size, output_height, output_width, filters)    def _pool_forward(self, X, layer_params):        """简化的池化前向传播"""        batch_size, height, width, channels = X.shape        pool_size = layer_params['pool_size']        # 简化的输出形状计算        output_height = height // pool_size        output_width = width // pool_size        return np.random.randn(batch_size, output_height, output_width, channels)    def _dense_forward(self, X, layer_params):        """简化的全连接前向传播"""        units = layer_params['units']        activation = layer_params['activation']        # 线性变换        output = np.random.randn(X.shape[0], units)        # 激活函数        if activation == 'relu':            output = np.maximum(0, output)        elif activation == 'sigmoid':            output = 1 / (1 + np.exp(-output))        return output    def summary(self):        """打印网络结构信息"""        print("=" * 60)        print("卷积神经网络结构摘要")        print("=" * 60)        print(f"输入形状: {self.input_shape}")        current_shape = self.input_shape        for i, layer in enumerate(self.layers):            layer_type = layer['type'].upper()            if layer_type == 'CONV':                filters = layer['filters']                kernel_size = layer['kernel_size']                print(f"层 {i+1}: 卷积层 "                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "                      f"过滤器: {filters}, 核大小: {kernel_size}x{kernel_size}")                # 更新形状（简化计算）                current_shape = (                    current_shape[0] - kernel_size + 1,                    current_shape[1] - kernel_size + 1,                    filters                )            elif layer_type == 'POOL':                pool_size = layer['pool_size']                print(f"层 {i+1}: 池化层 "                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "                      f"池化大小: {pool_size}x{pool_size}")                # 更新形状                current_shape = (                    current_shape[0] // pool_size,                    current_shape[1] // pool_size,                    current_shape[2]                )            elif layer_type == 'FLATTEN':                flattened_size = np.prod(current_shape)                print(f"层 {i+1}: 展平层 "                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "                      f"({flattened_size},)")                current_shape = (flattened_size,)            elif layer_type == 'DENSE':                units = layer['units']                print(f"层 {i+1}: 全连接层 "                      f"({current_shape[0] ifisinstance(current_shape, tuple) else current_shape}) → "                      f"({units},)")                current_shape = (units,)        print("=" * 60)# 创建并测试CNNcnn = SimpleCNN(input_shape=(28, 28, 1))cnn.add_conv_layer(filters=32, kernel_size=3, activation='relu')cnn.add_pooling_layer(pool_size=2)cnn.add_conv_layer(filters=64, kernel_size=3, activation='relu')cnn.add_pooling_layer(pool_size=2)cnn.add_flatten_layer()cnn.add_dense_layer(units=128, activation='relu')cnn.add_dense_layer(units=10, activation='softmax')cnn.summary()

3. 循环神经网络（RNN）

class SimpleRNN:    """实现简化的循环神经网络"""    def __init__(self, input_size, hidden_size, output_size):        """初始化RNN"""        self.input_size = input_size        self.hidden_size = hidden_size        self.output_size = output_size        # 初始化参数        # 输入到隐藏层的权重        self.W_xh = np.random.randn(input_size, hidden_size) * 0.01        # 隐藏层到隐藏层的权重        self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.01        # 隐藏层到输出层的权重        self.W_hy = np.random.randn(hidden_size, output_size) * 0.01        # 偏置项        self.b_h = np.zeros(hidden_size)        self.b_y = np.zeros(output_size)        # 缓存        self.history = {}    def forward(self, X):        """        前向传播        Args:            X: 输入序列，形状为(seq_length, batch_size, input_size)        Returns:            输出序列        """        seq_length, batch_size, _ = X.shape        # 初始化隐藏状态        h = np.zeros((batch_size, self.hidden_size))        # 存储历史值        self.history['h_states'] = [h]        self.history['inputs'] = X        # 存储输出        outputs = []        for t in range(seq_length):            # 当前时间步的输入            x_t = X[t]            # 更新隐藏状态            h = np.tanh(np.dot(x_t, self.W_xh) + np.dot(h, self.W_hh) + self.b_h)            self.history['h_states'].append(h)            # 计算输出            y_t = np.dot(h, self.W_hy) + self.b_y            outputs.append(y_t)        # 堆叠所有时间步的输出        return np.stack(outputs)    def backward(self, d_outputs):        """反向传播（简化版）"""        # 在实际实现中，这里会有详细的反向传播计算        # 这里我们返回梯度的简化版本        gradients = {            'dW_xh': np.random.randn(*self.W_xh.shape) * 0.01,            'dW_hh': np.random.randn(*self.W_hh.shape) * 0.01,            'dW_hy': np.random.randn(*self.W_hy.shape) * 0.01,            'db_h': np.random.randn(*self.b_h.shape) * 0.01,            'db_y': np.random.randn(*self.b_y.shape) * 0.01        }        return gradients    def summary(self):        """打印网络结构信息"""        print("=" * 50)        print("循环神经网络结构摘要")        print("=" * 50)        print(f"输入大小: {self.input_size}")        print(f"隐藏层大小: {self.hidden_size}")        print(f"输出大小: {self.output_size}")        print(f"总参数数: {self.W_xh.size + self.W_hh.size + self.W_hy.size + self.b_h.size + self.b_y.size:,}")        print("=" * 50)    def generate_sequence(self, seed, length=20):        """生成序列（简化版）"""        # 在实际实现中，这里会有序列生成逻辑        generated = [seed]        h = np.zeros((1, self.hidden_size))        for i in range(length):            # 简化的序列生成            x = generated[-1]            h = np.tanh(np.dot(x, self.W_xh) + np.dot(h, self.W_hh) + self.b_h)            y = np.dot(h, self.W_hy) + self.b_y            # 添加一些随机性            next_item = y + np.random.randn(*y.shape) * 0.1            generated.append(next_item)        return np.stack(generated)# 创建并测试RNNrnn = SimpleRNN(input_size=10, hidden_size=20, output_size=5)rnn.summary()# 测试前向传播seq_length = 15batch_size = 8X_test = np.random.randn(seq_length, batch_size, 10)output = rnn.forward(X_test)print(f"\n输入形状: {X_test.shape}")print(f"输出形状: {output.shape}")

四、深度学习框架比较

1. 主要框架对比

import pandas as pd# 创建框架对比表格frameworks_data = {    '框架': ['TensorFlow', 'PyTorch', 'Keras', 'MXNet', 'JAX', 'PaddlePaddle'],    '发布年份': [2015, 2016, 2015, 2015, 2018, 2016],    '开发者': ['Google', 'Facebook', 'François Chollet', 'Amazon', 'Google', '百度'],    '主要语言': ['Python/C++', 'Python/C++', 'Python', 'Python/C++', 'Python', 'Python'],    '易用性': [3, 5, 5, 3, 4, 4],    '灵活性': [5, 5, 3, 5, 5, 4],    '部署能力': [5, 4, 3, 5, 4, 5],    '社区规模': [5, 5, 4, 3, 3, 3],    '主要优势': [        '生产部署、生态完善',        '动态图、研究友好',        'API简洁、快速原型',        '分布式训练、多语言',        '函数式、自动微分',        '中文文档、国产框架'    ]}frameworks_df = pd.DataFrame(frameworks_data)# 格式化输出print("深度学习框架对比")print("=" * 100)print(frameworks_df.to_string(index=False))print("\n" + "=" * 100)# 创建选择指南print("\n选择指南:")print("1. TensorFlow: 适合生产部署，企业级应用，需要强大生态系统")print("2. PyTorch: 适合学术研究，快速原型开发，动态计算图")print("3. Keras: 适合初学者，快速上手，高级API")print("4. MXNet: 适合分布式训练，多语言支持")print("5. JAX: 适合函数式编程，数值计算，自动微分")print("6. PaddlePaddle: 国产框架，中文文档丰富，工业级应用")

2. 框架安装与简单示例

def get_framework_setup_guide():    """获取框架安装和使用指南"""    guides = {        'TensorFlow': {            '安装': 'pip install tensorflow',            '导入': 'import tensorflow as tf',            '简单示例': '''# 创建简单的神经网络model = tf.keras.Sequential([    tf.keras.layers.Dense(64, activation='relu'),    tf.keras.layers.Dense(10, activation='softmax')])model.compile(optimizer='adam',              loss='sparse_categorical_crossentropy',              metrics=['accuracy'])            ''',            '最新版本': '2.x (2023年)'        },        'PyTorch': {            '安装': 'pip install torch torchvision',            '导入': 'import torch',            '简单示例': '''# 创建简单的神经网络class Net(torch.nn.Module):    def __init__(self):        super().__init__()        self.fc1 = torch.nn.Linear(784, 64)        self.fc2 = torch.nn.Linear(64, 10)    def forward(self, x):        x = torch.relu(self.fc1(x))        x = self.fc2(x)        return x            ''',            '最新版本': '2.0+ (2023年)'        },        'Keras': {            '安装': 'pip install keras',            '导入': 'from keras import layers, models',            '简单示例': '''# 创建简单的神经网络model = models.Sequential()model.add(layers.Dense(64, activation='relu', input_shape=(784,)))model.add(layers.Dense(10, activation='softmax'))model.compile(optimizer='rmsprop',              loss='categorical_crossentropy',              metrics=['accuracy'])            ''',            '最新版本': '3.0 (2023年)'        }    }    return guides# 打印框架指南guides = get_framework_setup_guide()for framework, info in guides.items():    print(f"\n{'='*60}")    print(f"{framework} 快速指南")    print(f"{'='*60}")    for key, value in info.items():        print(f"{key}: {value}")

五、深度学习的关键概念

1. 损失函数

import numpy as npimport matplotlib.pyplot as pltclass LossFunctions:    """实现常见的损失函数"""    @staticmethod    def mse(y_true, y_pred):        """均方误差 (Mean Squared Error)"""        return np.mean((y_true - y_pred) ** 2)    @staticmethod    def mae(y_true, y_pred):        """平均绝对误差 (Mean Absolute Error)"""        return np.mean(np.abs(y_true - y_pred))    @staticmethod    def binary_crossentropy(y_true, y_pred, epsilon=1e-7):        """二分类交叉熵"""        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))    @staticmethod    def categorical_crossentropy(y_true, y_pred, epsilon=1e-7):        """多分类交叉熵"""        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)        return -np.mean(np.sum(y_true * np.log(y_pred), axis=-1))    @staticmethod    def huber_loss(y_true, y_pred, delta=1.0):        """Huber损失 (结合MSE和MAE的优点)"""        error = y_true - y_pred        abs_error = np.abs(error)        quadratic = np.minimum(abs_error, delta)        linear = abs_error - quadratic        return np.mean(0.5 * quadratic ** 2 + delta * linear)    @staticmethod    def contrastive_loss(y_true, y_pred, margin=1.0):        """对比损失 (用于度量学习)"""        positive_distance = y_true * y_pred ** 2        negative_distance = (1 - y_true) * np.maximum(margin - y_pred, 0) ** 2        return np.mean(positive_distance + negative_distance)    @staticmethod    def visualize_loss_functions():        """可视化损失函数"""        fig, axes = plt.subplots(2, 3, figsize=(15, 10))        axes = axes.ravel()        x = np.linspace(-3, 3, 100)        y_true = 0  # 假设真实值为0        # MSE        y_pred = x        loss = (y_true - y_pred) ** 2        axes[0].plot(x, loss, 'b-', linewidth=2)        axes[0].set_title('MSE (均方误差)', fontweight='bold')        axes[0].set_xlabel('预测值')        axes[0].set_ylabel('损失')        axes[0].grid(True, alpha=0.3)        # MAE        loss = np.abs(y_true - y_pred)        axes[1].plot(x, loss, 'r-', linewidth=2)        axes[1].set_title('MAE (平均绝对误差)', fontweight='bold')        axes[1].set_xlabel('预测值')        axes[1].grid(True, alpha=0.3)        # Huber Loss        delta = 1.0        loss = np.where(np.abs(x) <= delta, 0.5 * x ** 2, delta * (np.abs(x) - 0.5 * delta))        axes[2].plot(x, loss, 'g-', linewidth=2)        axes[2].set_title('Huber损失', fontweight='bold')        axes[2].set_xlabel('预测值')        axes[2].grid(True, alpha=0.3)        # Binary Crossentropy (假设真实值为1)        y_true_binary = 1        y_pred_binary = 1 / (1 + np.exp(-x))  # Sigmoid变换        loss = - (y_true_binary * np.log(y_pred_binary) +                  (1 - y_true_binary) * np.log(1 - y_pred_binary))        axes[3].plot(x, loss, 'm-', linewidth=2)        axes[3].set_title('二分类交叉熵 (y_true=1)', fontweight='bold')        axes[3].set_xlabel('预测值 (z)')        axes[3].grid(True, alpha=0.3)        # 对比损失        distance = np.abs(x)        margin = 1.0        y_true_contrastive = np.ones_like(x)  # 假设是正样本对        loss = y_true_contrastive * distance ** 2        axes[4].plot(x, loss, 'c-', linewidth=2, label='正样本')        y_true_contrastive = np.zeros_like(x)  # 假设是负样本对        loss = (1 - y_true_contrastive) * np.maximum(margin - distance, 0) ** 2        axes[4].plot(x, loss, 'y-', linewidth=2, label='负样本')        axes[4].set_title('对比损失', fontweight='bold')        axes[4].set_xlabel('距离')        axes[4].legend()        axes[4].grid(True, alpha=0.3)        # 损失函数应用场景        axes[5].axis('off')        axes[5].text(0.5, 0.5,                     '损失函数选择指南:\n\n'                    '• MSE: 回归问题，对异常值敏感\n'                    '• MAE: 回归问题，对异常值鲁棒\n'                    '• Huber: 结合MSE和MAE的优点\n'                    '• Binary CE: 二分类问题\n'                    '• Categorical CE: 多分类问题\n'                    '• Contrastive: 度量学习，相似度计算',                    ha='center', va='center', fontsize=11,                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))        plt.suptitle('深度学习常用损失函数', fontsize=16, fontweight='bold', y=1.02)        plt.tight_layout()        plt.show()# 测试损失函数loss_funcs = LossFunctions()# 测试数据y_true = np.array([1, 0, 1, 0])y_pred = np.array([0.9, 0.2, 0.8, 0.3])print("损失函数计算结果:")print(f"MSE: {loss_funcs.mse(y_true, y_pred):.4f}")print(f"MAE: {loss_funcs.mae(y_true, y_pred):.4f}")print(f"Binary Crossentropy: {loss_funcs.binary_crossentropy(y_true, y_pred):.4f}")# 可视化损失函数LossFunctions.visualize_loss_functions()

2. 优化器

class OptimizerComparison:    """优化器比较和可视化"""    @staticmethod    def visualize_optimization_path():        """可视化不同优化器的优化路径"""        # 定义测试函数 (Rosenbrock函数，有全局最小值)        def rosenbrock(x, y):            return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2        # 生成网格        x = np.linspace(-2, 2, 100)        y = np.linspace(-1, 3, 100)        X, Y = np.meshgrid(x, y)        Z = rosenbrock(X, Y)        # 优化器模拟        optimizers = {            'SGD': {                'lr': 0.01,                'momentum': 0.0            },            'SGD with Momentum': {                'lr': 0.01,                'momentum': 0.9            },            'Adam': {                'lr': 0.01,                'beta1': 0.9,                'beta2': 0.999            },            'RMSprop': {                'lr': 0.01,                'rho': 0.9            },            'Adagrad': {                'lr': 0.1            }        }        # 创建图形        fig, axes = plt.subplots(2, 3, figsize=(15, 10))        axes = axes.ravel()        for idx, (name, params) in enumerate(optimizers.items()):            ax = axes[idx]            # 绘制等高线            ax.contour(X, Y, Z, levels=np.logspace(-1, 3, 20), alpha=0.5)            # 初始化参数            x_pos, y_pos = -1.5, 2.5            # 存储轨迹            trajectory = [(x_pos, y_pos)]            # 模拟优化过程            for step in range(100):                # 计算梯度                grad_x = -2 * (1 - x_pos) - 400 * x_pos * (y_pos - x_pos ** 2)                grad_y = 200 * (y_pos - x_pos ** 2)                # 应用不同优化器更新规则                if name == 'SGD':                    x_pos -= params['lr'] * grad_x                    y_pos -= params['lr'] * grad_y                elif name == 'SGD with Momentum':                    # 简化的动量实现                    if step == 0:                        vx, vy = 0, 0                    vx = params['momentum'] * vx + params['lr'] * grad_x                    vy = params['momentum'] * vy + params['lr'] * grad_y                    x_pos -= vx                    y_pos -= vy                elif name == 'Adam':                    # 简化的Adam实现                    if step == 0:                        m1x, m1y = 0, 0                        m2x, m2y = 0, 0                    m1x = params['beta1'] * m1x + (1 - params['beta1']) * grad_x                    m1y = params['beta1'] * m1y + (1 - params['beta1']) * grad_y                    m2x = params['beta2'] * m2x + (1 - params['beta2']) * grad_x ** 2                    m2y = params['beta2'] * m2y + (1 - params['beta2']) * grad_y ** 2                    # 偏置校正                    m1x_hat = m1x / (1 - params['beta1'] ** (step + 1))                    m1y_hat = m1y / (1 - params['beta1'] ** (step + 1))                    m2x_hat = m2x / (1 - params['beta2'] ** (step + 1))                    m2y_hat = m2y / (1 - params['beta2'] ** (step + 1))                    x_pos -= params['lr'] * m1x_hat / (np.sqrt(m2x_hat) + 1e-8)                    y_pos -= params['lr'] * m1y_hat / (np.sqrt(m2y_hat) + 1e-8)                trajectory.append((x_pos, y_pos))            # 绘制轨迹            trajectory = np.array(trajectory)            ax.plot(trajectory[:, 0], trajectory[:, 1], 'ro-', linewidth=2, markersize=3)            ax.plot(trajectory[0, 0], trajectory[0, 1], 'go', markersize=8, label='起点')            ax.plot(trajectory[-1, 0], trajectory[-1, 1], 'bo', markersize=8, label='终点')            ax.set_title(name, fontweight='bold')            ax.set_xlabel('x')            ax.set_ylabel('y')            ax.legend()            ax.grid(True, alpha=0.3)        # 优化器选择指南        axes[5].axis('off')        axes[5].text(0.5, 0.5,                     '优化器选择指南:\n\n'                    '• SGD: 简单，收敛慢，可能震荡\n'                    '• SGD+Momentum: 减少震荡，加速收敛\n'                    '• Adam: 自适应学习率，通常表现好\n'                    '• RMSprop: 适合非平稳目标\n'                    '• Adagrad: 适合稀疏数据\n\n'                    '一般推荐: Adam (默认选择)',                    ha='center', va='center', fontsize=11,                    bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))        plt.suptitle('优化器比较：在Rosenbrock函数上的优化路径',                     fontsize=16, fontweight='bold', y=1.02)        plt.tight_layout()        plt.show()    @staticmethod    def optimizer_summary():        """优化器特性总结"""        optimizers_info = {            'SGD': {                '公式': 'θ = θ - η·∇J(θ)',                '优点': '简单，理论基础强',                '缺点': '收敛慢，易震荡',                '适用场景': '凸优化问题'            },            'Momentum': {                '公式': 'v = βv + η·∇J(θ)\nθ = θ - v',                '优点': '加速收敛，减少震荡',                '缺点': '需要调节β参数',                '适用场景': '深度学习训练'            },            'Adam': {                '公式': '复杂，结合动量和自适应学习率',                '优点': '自适应学习率，通常表现优异',                '缺点': '内存占用稍大',                '适用场景': '深度学习（默认推荐）'            },            'RMSprop': {                '公式': 'E[g²] = ρE[g²] + (1-ρ)g²\nθ = θ - η·g/√(E[g²]+ε)',                '优点': '自适应学习率，适合非平稳目标',                '缺点': '需要调节ρ参数',                '适用场景': 'RNN训练'            },            'Adagrad': {                '公式': 'G = G + g⊙g\nθ = θ - η·g/√(G+ε)',                '优点': '自适应学习率，适合稀疏数据',                '缺点': '学习率单调递减',                '适用场景': '稀疏特征学习'            }        }        print("=" * 80)        print("深度学习优化器总结")        print("=" * 80)        for name, info in optimizers_info.items():            print(f"\n{name}:")            print(f"  公式: {info['公式']}")            print(f"  优点: {info['优点']}")            print(f"  缺点: {info['缺点']}")            print(f"  适用场景: {info['适用场景']}")        print("\n" + "=" * 80)# 显示优化器信息OptimizerComparison.optimizer_summary()OptimizerComparison.visualize_optimization_path()

六、深度学习训练技巧

1. 正则化技术

class RegularizationTechniques:    """深度学习正则化技术"""    @staticmethod    def l1_regularization(weights, lambda_l1):        """L1正则化 (Lasso)"""        return lambda_l1 * np.sum(np.abs(weights))    @staticmethod    def l2_regularization(weights, lambda_l2):        """L2正则化 (Ridge)"""        return lambda_l2 * np.sum(weights ** 2)    @staticmethod    def elastic_net(weights, lambda_l1, lambda_l2):        """弹性网络 (结合L1和L2)"""        return (lambda_l1 * np.sum(np.abs(weights)) +                 lambda_l2 * np.sum(weights ** 2))    @staticmethod    def dropout(activations, dropout_rate, training=True):        """Dropout正则化"""        if not training:            return activations        # 生成Dropout掩码        mask = np.random.binomial(1, 1 - dropout_rate, size=activations.shape)        # 应用Dropout并缩放        activations = activations * mask / (1 - dropout_rate)        return activations    @staticmethod    def batch_normalization(x, gamma=1, beta=0, epsilon=1e-5):        """批量归一化 (简化版)"""        # 计算批次的均值和方差        mean = np.mean(x, axis=0)        variance = np.var(x, axis=0)        # 归一化        x_norm = (x - mean) / np.sqrt(variance + epsilon)        # 缩放和偏移        return gamma * x_norm + beta    @staticmethod    def data_augmentation_examples():        """数据增强示例"""        techniques = {            '图像数据增强': [                '随机旋转 (±30度)',                '随机缩放 (0.8-1.2倍)',                '随机裁剪',                '随机水平翻转',                '颜色抖动 (亮度、对比度、饱和度)',                '随机噪声添加'            ],            '文本数据增强': [                '同义词替换',                '随机插入',                '随机交换',                '随机删除',                '回译 (翻译成其他语言再译回)',                'EDA (Easy Data Augmentation)'            ],            '时间序列增强': [                '时间扭曲',                '窗口滑动',                '随机缩放',                '添加噪声',                '通道混洗 (多变量时)'            ]        }        print("数据增强技术:")        print("=" * 60)        for category, methods in techniques.items():            print(f"\n{category}:")            for method in methods:                print(f"  • {method}")        print("\n" + "=" * 60)    @staticmethod    def early_stopping_callback(patience=10, min_delta=0.001):        """早停回调函数"""        class EarlyStopping:            def __init__(self, patience=patience, min_delta=min_delta):                self.patience = patience                self.min_delta = min_delta                self.best_loss = float('inf')                self.counter = 0                self.should_stop = False            def __call__(self, current_loss):                if current_loss < self.best_loss - self.min_delta:                    self.best_loss = current_loss                    self.counter = 0                    print(f"损失改善: {current_loss:.4f}")                    return False                else:                    self.counter += 1                    print(f"早停计数: {self.counter}/{self.patience}")                    if self.counter >= self.patience:                        self.should_stop = True                        print("达到早停条件，停止训练")                    return self.should_stop        return EarlyStopping()# 测试正则化技术reg = RegularizationTechniques()# 测试Dropoutactivations = np.array([[1.0, 2.0, 3.0],                        [4.0, 5.0, 6.0]])dropout_rate = 0.5print("Dropout示例:")print(f"原始激活值:\n{activations}")print(f"Dropout后 (训练模式):\n{reg.dropout(activations, dropout_rate, training=True)}")print(f"Dropout后 (推理模式):\n{reg.dropout(activations, dropout_rate, training=False)}")# 显示数据增强技术reg.data_augmentation_examples()

2. 超参数调优

class HyperparameterTuning:    """深度学习超参数调优"""    @staticmethod    def learning_rate_scheduler():        """学习率调度器"""        schedulers = {            '固定学习率': {                '描述': '整个训练过程使用固定学习率',                '适用场景': '简单任务，小数据集',                '代码示例': 'lr = 0.001'            },            '阶梯下降': {                '描述': '在指定轮次降低学习率',                '适用场景': '大多数深度学习任务',                '代码示例': '''if epoch % 30 == 0:    lr *= 0.1                '''            },            '余弦退火': {                '描述': '学习率按余弦函数从高到低变化',                '适用场景': '需要跳出局部最优的任务',                '公式': 'lr = lr_min + 0.5*(lr_max-lr_min)*(1+cos(epoch/T_max*π))'            },            '循环学习率': {                '描述': '学习率在最小和最大值之间循环变化',                '适用场景': '提高模型泛化能力',                '代码示例': '''cycle = epoch % cycle_lengthlr = lr_min + 0.5*(lr_max-lr_min)*(1+cos(cycle/cycle_length*π))                '''            },            '热重启': {                '描述': '周期性重启学习率，每次重启后逐渐降低峰值',                '适用场景': '复杂任务，需要精细调优',                '优势': '结合大范围探索和精细调优'            }        }        print("学习率调度策略:")        print("=" * 80)        for name, info in schedulers.items():            print(f"\n{name}:")            print(f"  描述: {info['描述']}")            print(f"  适用场景: {info['适用场景']}")            if '公式' in info:                print(f"  公式: {info['公式']}")            if '代码示例' in info:                print(f"  代码示例: {info['代码示例']}")        print("\n" + "=" * 80)    @staticmethod    def visualize_learning_rates():        """可视化不同学习率调度策略"""        epochs = 100        # 不同调度策略        strategies = {            '固定学习率': [0.001] * epochs,            '阶梯下降': [],            '指数衰减': [],            '余弦退火': [],            '循环学习率': []        }        # 生成学习率序列        for epoch in range(epochs):            # 阶梯下降 (每30轮降低10倍)            lr = 0.001            if epoch >= 30:                lr *= 0.1            if epoch >= 60:                lr *= 0.1            strategies['阶梯下降'].append(lr)            # 指数衰减            strategies['指数衰减'].append(0.001 * np.exp(-0.05 * epoch))            # 余弦退火            T_max = 50            lr_min = 0.0001            lr_max = 0.01            strategies['余弦退火'].append(                lr_min + 0.5 * (lr_max - lr_min) *                 (1 + np.cos(epoch / T_max * np.pi))            )            # 循环学习率            cycle_length = 20            cycle = epoch % cycle_length            strategies['循环学习率'].append(                0.0001 + 0.5 * (0.01 - 0.0001) *                 (1 + np.cos(cycle / cycle_length * np.pi))            )        # 绘制图形        plt.figure(figsize=(12, 8))        for idx, (name, lr_sequence) in enumerate(strategies.items()):            plt.plot(lr_sequence, linewidth=2, label=name)        plt.xlabel('训练轮次', fontsize=12)        plt.ylabel('学习率', fontsize=12)        plt.title('不同学习率调度策略比较', fontsize=16, fontweight='bold')        plt.legend(fontsize=10)        plt.grid(True, alpha=0.3)        plt.yscale('log')  # 对数尺度        plt.tight_layout()        plt.show()    @staticmethod    def hyperparameter_search_space():        """深度学习超参数搜索空间"""        search_space = {            '学习率': {                '范围': [1e-5, 1e-1],                '推荐值': [1e-3, 3e-4, 1e-4],                '搜索策略': '对数均匀采样',                '备注': '最重要的超参数'            },            '批量大小': {                '范围': [16, 256],                '推荐值': [32, 64, 128],                '搜索策略': '均匀采样 (2的幂次)',                '备注': 'GPU内存允许的情况下尽量大'            },            '网络深度': {                '范围': [2, 20],                '推荐值': [3, 5, 8, 12],                '搜索策略': '均匀采样',                '备注': '根据任务复杂度选择'            },            'Dropout率': {                '范围': [0.0, 0.5],                '推荐值': [0.2, 0.3, 0.5],                '搜索策略': '均匀采样',                '备注': '正则化强度，防止过拟合'            },            '权重衰减': {                '范围': [0.0, 0.1],                '推荐值': [1e-4, 1e-5, 0.0],                '搜索策略': '对数均匀采样',                '备注': 'L2正则化强度'            },            '优化器': {                '选项': ['Adam', 'SGD', 'RMSprop', 'Adagrad'],                '推荐值': 'Adam',                '搜索策略': '类别采样',                '备注': 'Adam通常是默认选择'            }        }        print("深度学习超参数搜索空间:")        print("=" * 100)        for param, info in search_space.items():            print(f"\n{param}:")            for key, value in info.items():                print(f"  {key}: {value}")        print("\n" + "=" * 100)        print("\n调优策略建议:")        print("1. 先调学习率，固定其他参数")        print("2. 然后调批量大小和网络结构")        print("3. 最后调正则化相关参数")        print("4. 使用贝叶斯优化或随机搜索")        print("5. 早停防止过拟合")# 显示超参数调优信息tuning = HyperparameterTuning()tuning.hyperparameter_search_space()tuning.learning_rate_scheduler()tuning.visualize_learning_rates()

七、实践项目：手写数字识别

import numpy as npimport matplotlib.pyplot as pltfrom sklearn.datasets import fetch_openmlfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import OneHotEncoderclass MNISTDigitRecognition:    """手写数字识别实践项目"""    def __init__(self):        """初始化"""        self.X_train = None        self.y_train = None        self.X_test = None        self.y_test = None        self.model = None    def load_data(self):        """加载MNIST数据集"""        print("加载MNIST数据集...")        # 使用fetch_openml加载MNIST数据集        mnist = fetch_openml('mnist_784', version=1, parser='auto')        X = mnist.data.astype('float32') / 255.0  # 归一化        y = mnist.target.astype('int')        # 划分训练集和测试集        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(            X, y, test_size=0.2, random_state=42, stratify=y        )        print(f"训练集大小: {self.X_train.shape}")        print(f"测试集大小: {self.X_test.shape}")        print(f"类别分布: {np.bincount(self.y_train)}")        return self    def visualize_samples(self, n_samples=10):        """可视化样本"""        plt.figure(figsize=(15, 6))        for i in range(n_samples):            plt.subplot(2, n_samples//2, i+1)            image = self.X_train[i].reshape(28, 28)            plt.imshow(image, cmap='gray')            plt.title(f"标签: {self.y_train[i]}")            plt.axis('off')        plt.suptitle('MNIST手写数字样本', fontsize=16, fontweight='bold')        plt.tight_layout()        plt.show()    def create_simple_model(self):        """创建简单的神经网络模型"""        class SimpleNN:            """简单的全连接神经网络"""            def __init__(self, input_size=784, hidden_size=128, output_size=10):                self.input_size = input_size                self.hidden_size = hidden_size                self.output_size = output_size                # 初始化参数                self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)                self.b1 = np.zeros(hidden_size)                self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size)                self.b2 = np.zeros(output_size)                # 缓存                self.cache = {}            def relu(self, x):                """ReLU激活函数"""                return np.maximum(0, x)            def softmax(self, x):                """Softmax激活函数"""                exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))                return exp_x / np.sum(exp_x, axis=1, keepdims=True)            def forward(self, X):                """前向传播"""                # 第一层                z1 = np.dot(X, self.W1) + self.b1                a1 = self.relu(z1)                # 第二层                z2 = np.dot(a1, self.W2) + self.b2                a2 = self.softmax(z2)                # 缓存中间结果                self.cache = {'X': X, 'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}                return a2            def backward(self, X, y, learning_rate=0.01):                """反向传播"""                m = X.shape[0]                # 从缓存中获取前向传播的结果                z1 = self.cache['z1']                a1 = self.cache['a1']                a2 = self.cache['a2']                # 将y转换为one-hot编码                y_onehot = np.zeros((m, self.output_size))                y_onehot[np.arange(m), y] = 1                # 计算输出层的梯度                dz2 = a2 - y_onehot                dW2 = np.dot(a1.T, dz2) / m                db2 = np.sum(dz2, axis=0) / m                # 计算隐藏层的梯度                da1 = np.dot(dz2, self.W2.T)                dz1 = da1 * (z1 > 0)  # ReLU的梯度                dW1 = np.dot(X.T, dz1) / m                db1 = np.sum(dz1, axis=0) / m                # 更新参数                self.W2 -= learning_rate * dW2                self.b2 -= learning_rate * db2                self.W1 -= learning_rate * dW1                self.b1 -= learning_rate * db1            def predict(self, X):                """预测"""                probas = self.forward(X)                return np.argmax(probas, axis=1)            def evaluate(self, X, y):                """评估模型"""                y_pred = self.predict(X)                accuracy = np.mean(y_pred == y)                return accuracy            def summary(self):                """打印模型信息"""                print("=" * 50)                print("简单神经网络模型")                print("=" * 50)                print(f"输入大小: {self.input_size}")                print(f"隐藏层大小: {self.hidden_size}")                print(f"输出大小: {self.output_size}")                total_params = (self.W1.size + self.b1.size +                               self.W2.size + self.b2.size)                print(f"总参数数: {total_params:,}")                print("=" * 50)        self.model = SimpleNN()        return self.model    def train_model(self, epochs=10, batch_size=64, learning_rate=0.01):        """训练模型"""        n_samples = self.X_train.shape[0]        n_batches = n_samples // batch_size        print(f"开始训练...")        print(f"训练样本数: {n_samples}")        print(f"批次大小: {batch_size}")        print(f"批次数: {n_batches}")        print(f"训练轮次: {epochs}")        train_losses = []        train_accuracies = []        test_accuracies = []        for epoch in range(epochs):            epoch_loss = 0            epoch_accuracy = 0            # 打乱数据            indices = np.random.permutation(n_samples)            X_shuffled = self.X_train[indices]            y_shuffled = self.y_train[indices]            for batch in range(n_batches):                # 获取当前批次数据                start = batch * batch_size                end = start + batch_size                X_batch = X_shuffled[start:end]                y_batch = y_shuffled[start:end]                # 前向传播                y_pred = self.model.forward(X_batch)                # 计算损失（交叉熵）                m = X_batch.shape[0]                y_onehot = np.zeros((m, 10))                y_onehot[np.arange(m), y_batch] = 1                loss = -np.mean(np.sum(y_onehot * np.log(y_pred + 1e-8), axis=1))                epoch_loss += loss                # 计算准确率                batch_pred = np.argmax(y_pred, axis=1)                batch_acc = np.mean(batch_pred == y_batch)                epoch_accuracy += batch_acc                # 反向传播和参数更新                self.model.backward(X_batch, y_batch, learning_rate)            # 计算平均损失和准确率            avg_loss = epoch_loss / n_batches            avg_accuracy = epoch_accuracy / n_batches            # 测试集准确率            test_acc = self.model.evaluate(self.X_test, self.y_test)            train_losses.append(avg_loss)            train_accuracies.append(avg_accuracy)            test_accuracies.append(test_acc)            print(f"轮次 {epoch+1}/{epochs}: "                  f"训练损失={avg_loss:.4f}, "                  f"训练准确率={avg_accuracy:.4f}, "                  f"测试准确率={test_acc:.4f}")        # 可视化训练过程        self.plot_training_history(train_losses, train_accuracies, test_accuracies)        return train_losses, train_accuracies, test_accuracies    def plot_training_history(self, train_losses, train_accuracies, test_accuracies):        """绘制训练历史"""        fig, axes = plt.subplots(1, 2, figsize=(12, 4))        # 损失曲线        axes[0].plot(train_losses, 'b-', linewidth=2, label='训练损失')        axes[0].set_xlabel('训练轮次')        axes[0].set_ylabel('损失')        axes[0].set_title('训练损失曲线')        axes[0].legend()        axes[0].grid(True, alpha=0.3)        # 准确率曲线        axes[1].plot(train_accuracies, 'g-', linewidth=2, label='训练准确率')        axes[1].plot(test_accuracies, 'r-', linewidth=2, label='测试准确率')        axes[1].set_xlabel('训练轮次')        axes[1].set_ylabel('准确率')        axes[1].set_title('准确率曲线')        axes[1].legend()        axes[1].grid(True, alpha=0.3)        plt.suptitle('MNIST手写数字识别训练历史', fontsize=14, fontweight='bold')        plt.tight_layout()        plt.show()    def show_predictions(self, n_samples=15):        """展示预测结果"""        # 随机选择测试样本        indices = np.random.choice(len(self.X_test), n_samples, replace=False)        X_sample = self.X_test[indices]        y_sample = self.y_test[indices]        # 预测        y_pred = self.model.predict(X_sample)        # 可视化        plt.figure(figsize=(15, 10))        n_cols = 5        n_rows = int(np.ceil(n_samples / n_cols))        for i, idx in enumerate(indices):            plt.subplot(n_rows, n_cols, i+1)            image = X_sample[i].reshape(28, 28)            plt.imshow(image, cmap='gray')            # 标记正确/错误            is_correct = y_pred[i] == y_sample[i]            color = 'green' if is_correct else 'red'            plt.title(f"真实: {y_sample[i]}\n预测: {y_pred[i]}", color=color)            plt.axis('off')        accuracy = np.mean(y_pred == y_sample)        plt.suptitle(f'预测结果 (准确率: {accuracy:.2%})', fontsize=16, fontweight='bold')        plt.tight_layout()        plt.show()        # 打印混淆矩阵（简化）        print("\n预测结果统计:")        print(f"样本数: {n_samples}")        print(f"正确数: {np.sum(y_pred == y_sample)}")        print(f"错误数: {np.sum(y_pred != y_sample)}")        print(f"准确率: {accuracy:.2%}")# 运行MNIST手写数字识别项目mnist_project = MNISTDigitRecognition()# 加载数据mnist_project.load_data()# 可视化样本mnist_project.visualize_samples(10)# 创建模型model = mnist_project.create_simple_model()model.summary()# 训练模型train_losses, train_accuracies, test_accuracies = mnist_project.train_model(    epochs=20,    batch_size=128,    learning_rate=0.01)# 展示预测结果mnist_project.show_predictions(15)

深度学习正在改变世界。 从图像识别到自然语言处理，从自动驾驶到医疗诊断，深度学习的应用无处不在。虽然深度学习技术看起来很复杂，但通过系统的学习和实践，你也可以掌握这项强大的技术。

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

一天一个Python知识点——Day 151:深度学习概述

一、什么是深度学习？

1. 深度学习的定义

2. 深度学习的历史里程碑

二、神经网络基础

1. 人工神经元模型

2. 常用激活函数及其特性

三、深度学习架构类型

1. 前馈神经网络（FNN）

2. 卷积神经网络（CNN）

3. 循环神经网络（RNN）

四、深度学习框架比较

1. 主要框架对比

2. 框架安装与简单示例

五、深度学习的关键概念

1. 损失函数

2. 优化器

六、深度学习训练技巧

1. 正则化技术

2. 超参数调优

七、实践项目：手写数字识别

最新文章

热门文章

随机文章

一天一个Python知识点——Day 151:深度学习概述

一、什么是深度学习？

1. 深度学习的定义

2. 深度学习的历史里程碑

二、神经网络基础

1. 人工神经元模型

2. 常用激活函数及其特性

三、深度学习架构类型

1. 前馈神经网络（FNN）

2. 卷积神经网络（CNN）

3. 循环神经网络（RNN）

四、深度学习框架比较

1. 主要框架对比

2. 框架安装与简单示例

五、深度学习的关键概念

1. 损失函数

2. 优化器

六、深度学习训练技巧

1. 正则化技术

2. 超参数调优

七、实践项目：手写数字识别

Python常用语法顺口溜

35个Python数据分析常用到的pandas函数!码住

最新文章

热门文章

随机文章