llama-factory源码详解——以DPO为例
由于原文提供的代码已经相对完整,我们可以直接以DPO为例来解释其中的关键部分。
class DPO(nn.Module):
"""Decentralized Q-function for DQN agents.
The DPO module is a feedforward neural network that maps from state and
action to Q-value.
Attributes:
state_dim (int): Dimension of states.
action_dim (int): Dimension of actions.
hidden_dim (int): Dimension of hidden layers in network.
num_hidden_layers (int): Number of hidden layers in network.
use_batch_norm (bool): Whether to use batch normalization or not.
q_func_type (str): The type of the Q-function.
activation (torch.nn.Module): Activation function.
"""
def __init__(self, state_dim, action_dim, hidden_dim=256, num_hidden_layers=2,
use_batch_norm=True, q_func_type='mean', activation=torch.nn.ReLU):
super(DPO, self).__init__()
self.state_dim = state_dim
self.action_dim = action_dim
self.hidden_dim = hidden_dim
self.num_hidden_layers = num_hidden_layers
self.use_batch_norm = use_batch_norm
self.q_func_type = q_func_type
if self.q_func_type == 'mean':
self.q_func = MeanQFunction(state_dim=state_dim,
action_dim=action_dim,
hidden_dim=hidden_dim,
num_hidden_layers=num_hidden_layers,
use_batch_norm=use_batch_norm,
activation=activation)
elif self.q_func_type == 'qr':
self.q_func = QRQFunction(state_dim=state_dim,
action_dim=action_dim,
hidden_dim=hidden_dim,
num_hidden_layers=num_hidden_layers,
use_batch_norm=use_batch_norm,
activation=activation)
else:
raise ValueError('Invalid q_func_type: {}'.format(self.q_func_type))
def forward(sel
评论已关闭