http://romain.raveaux.free.fr/ then teaching section
The content of the is notebook is based on the following lectures : Supervised Machine Learning for structured input/output: Polytech, Tours
1. Introduction to supervised Machine Learning: A probabilistic introduction PDF
2. Connecting local models : The case of chains PDF slides
3. Connecting local models : Beyond chains and trees.PDF slides
4. Machine Learning and Graphs : Introduction and problems PDF slides
5. Graph Neural Networks. PDF slides
6. Graph Kernels. PDF slides
7. Appendix : Introduction to deep learning. PDF slides
#load of import
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.animation as animation
%matplotlib inline
from IPython.display import HTML
Briefly, Zachary’s Karate Club is a small social network where a conflict arises between the administrator and instructor in a karate club. The task is to predict which side of the conflict each member of the karate club chooses. The graph representation of the network can be seen below. Each node represents a member of the karate club and a link between members indicate that they interact outside the club. The Administrator and Instructor marked with A and I, respectively.
from networkx import karate_club_graph, to_numpy_matrix
G = karate_club_graph()
#order = sorted(list(G.nodes()))
nx.draw(G, with_labels=True, font_weight='bold')
plt.show()
print('We have %d nodes.' % G.number_of_nodes())
print('We have %d edges.' % G.number_of_edges())
Graph neural networks associate features with nodes and edges for training. For our classification example, we assign each node an input feature as a one-hot vector: node $v_i$‘s feature vector is $[0,…,1,…,0]$, where the $i^{th}$ position is one.
In DGL, you can add features for all nodes at once, using a feature tensor that batches node features along the first dimension. The code below adds the one-hot feature for all nodes:
import torch
eye=np.eye(34)
node_label = {}
for i in range(G.number_of_nodes()):
node_label[i]=eye[i,:]
nx.set_node_attributes(G,node_label,'feature')
print(len(G.nodes))
print(len(G.edges))
print(G.number_of_nodes())
print(G.number_of_edges())
listnodes = list(G.nodes(data='feature'))
n=listnodes[0]
print(n)
print(n[1].shape)
nodelist, nodesfeatures = map(list, zip(*G.nodes(data='feature')))
print(len(nodesfeatures))
nodesfeatures = np.array(nodesfeatures)
print(nodesfeatures.shape)
nodesfeatures = torch.from_numpy(nodesfeatures).float()
adjacencymatrix = np.array(nx.adjacency_matrix(G, nodelist=nodelist).todense())
print(adjacencymatrix.shape)
adjacencymatrix = torch.from_numpy(adjacencymatrix).float()
We use one-hot vectors to initialize the node features. Since this is a semi-supervised setting, only the instructor (node 0) and the club president (node 33) are assigned labels. The implementation is available as follow.
#inputs = torch.eye(34)
labeled_nodes = torch.tensor([0, 33]) # only the instructor and the president nodes are labeled
print(labeled_nodes)
labels = torch.tensor([0, 1]) # their labels are different
print(labels)
Firstly, we have to define a Graph Convolution layer
By denoting $\mathcal{A}=\{I,A,A_2,A_3\}$, a GNN layer is defined as :
Note that the U operator is not used.
import torch
import torch.nn as nn
class GraphConvolution(nn.Module):
"""
Graph convolution layer
"""
def __init__(self, in_features, out_features, bias=True, batchnorm=False):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.bias = bias
self.fc = nn.Linear(4*self.in_features, self.out_features, bias=self.bias)
self.batchnorm = batchnorm
#H are node features for all graphs batch
#W are adjacency matrix for all graphs batch
#A is the set of operators
# GraphConv = A[0].H_0.W_0
def forward(self, H, A):
res = torch.zeros((H.shape[0],self.in_features*4))
output1 = torch.matmul(A[0], H)
res[:,0:self.in_features]=output1
output2 = torch.matmul(A[1], H)
res[:,self.in_features:2*self.in_features]=output2
output3 = torch.matmul(A[2], H)
res[:,2*self.in_features:3*self.in_features]=output3
output4 = torch.matmul(A[3], H)
res[:,3*self.in_features:4*self.in_features]=output4
#FC is just a linear function input multiplied by the paramaters W
output = self.fc(res)
return output
# A Simple model with 2 graph conv layers
# activation function are ReLus
class Net(nn.Module):
def __init__(self, in_dim, hidden_dim, n_classes):
super(Net, self).__init__()
self.layers = nn.ModuleList([
GraphConvolution(in_dim, hidden_dim),
GraphConvolution(hidden_dim, n_classes)])
def forward(self, h, adj):
# Add self connections to the adjacency matrix
id = torch.eye(h.shape[0])
adj2=torch.pow(adj,2)
adj3=torch.pow(adj,3)
#one = torch.ones(h.shape[0])
for conv in self.layers:
h = F.relu(conv(h, [id,adj,adj2,adj3]))
return h
The training loop is exactly the same as other PyTorch models. We (1) create an optimizer, (2) feed the inputs to the model, (3) calculate the loss and (4) use autograd to optimize the model
Where $Z$ is the output of the graph neural network. $l_{reg}$ can be computed as follows by the Frobenius norm : $$l_{reg}=||Z.Z^T -A ||_{F}^2 $$.
nb_channels=34
num_class=2
num_hidden=5
model = Net(nb_channels, num_hidden,num_class )
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
lossfunction = torch.nn.CrossEntropyLoss()
all_logits = []
optimizer.zero_grad()
for epoch in range(30):
prediction = model(nodesfeatures, adjacencymatrix)
# we save the prediction for visualization later
all_logits.append(prediction.detach())
# we only compute loss for labeled nodes
loss0 = lossfunction(prediction[labeled_nodes], labels.long())
loss=loss0
#The crossentropy loss does the same as
#logp = F.log_softmax(prediction, 1)
#loss0 = F.nll_loss(logp[labeled_nodes], labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))
This is a rather toy example, so it does not even have a validation or test set. Instead, Since the model produces an output feature of size 2 for each node, we can visualize by plotting the output feature in a 2D space. The following code animates the training process from initial guess (where the nodes are not classified correctly at all) to the end (where the nodes are linearly separable).
def draw(i):
cls1color = '#00FFFF'
cls2color = '#FF00FF'
pos = {}
colors = []
for v in range(34):
pos[v] = all_logits[i][v].numpy()
cls = pos[v].argmax()
colors.append(cls1color if cls else cls2color)
ax.cla()
ax.axis('off')
ax.set_title('Epoch: %d' % i)
nx.draw_networkx(G, pos, node_color=colors,
with_labels=True, node_size=300, ax=ax)
fig = plt.figure(dpi=150)
fig.clf()
ax = fig.subplots()
draw(0) # draw the prediction of the first epoch
plt.close()
ani = animation.FuncAnimation(fig, draw, frames=len(all_logits), interval=200)
HTML(ani.to_html5_video())
def regularization(ypred,adj):
transpo=ypred.t()
mult=ypred.matmul(transpo)
dif = mult-adj
res = torch.norm(dif, p='fro')
return res**2
nb_channels=34
num_class=2
num_hidden=2
model = Net(nb_channels, num_hidden,num_class )
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
lossfunction = torch.nn.CrossEntropyLoss()
all_logits = []
for epoch in range(100):
prediction = model(nodesfeatures, adjacencymatrix)
# we save the prediction for visualization later
all_logits.append(prediction.detach())
lossreg = regularization(prediction,adjacencymatrix)
loss=lossreg
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))
def draw(i):
cls1color = '#00FFFF'
cls2color = '#FF00FF'
pos = {}
colors = []
for v in range(34):
pos[v] = all_logits[i][v].numpy()
cls = pos[v].argmax()
colors.append(cls1color if cls else cls2color)
ax.cla()
ax.axis('off')
ax.set_title('Epoch: %d' % i)
nx.draw_networkx(G, pos, node_color=colors,
with_labels=True, node_size=300, ax=ax)
fig = plt.figure(dpi=150)
fig.clf()
ax = fig.subplots()
draw(0) # draw the prediction of the first epoch
plt.close()
ani = animation.FuncAnimation(fig, draw, frames=len(all_logits), interval=200)
HTML(ani.to_html5_video())
def regularization(ypred,adj):
transpo=ypred.t()
mult=ypred.matmul(transpo)
dif = mult-adj
res = torch.norm(dif, p='fro')
#return res
return res**2
nb_channels=34
num_class=2
num_hidden=2
lambdaa=0.0001
model = Net(nb_channels, num_hidden,num_class )
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
lossfunction = torch.nn.CrossEntropyLoss()
all_logits = []
for epoch in range(100):
prediction = model(nodesfeatures, adjacencymatrix)
# we save the prediction for visualization later
all_logits.append(prediction.detach())
loss0 = lossfunction(prediction[labeled_nodes], labels.long())
#logp = F.log_softmax(prediction, 1)
lossreg = regularization(prediction,adjacencymatrix)
loss=loss0+lambdaa*lossreg
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))
def draw(i):
cls1color = '#00FFFF'
cls2color = '#FF00FF'
pos = {}
colors = []
for v in range(34):
pos[v] = all_logits[i][v].numpy()
cls = pos[v].argmax()
colors.append(cls1color if cls else cls2color)
ax.cla()
ax.axis('off')
ax.set_title('Epoch: %d' % i)
nx.draw_networkx(G, pos, node_color=colors,
with_labels=True, node_size=300, ax=ax)
fig = plt.figure(dpi=150)
fig.clf()
ax = fig.subplots()
draw(0) # draw the prediction of the first epoch
plt.close()
ani = animation.FuncAnimation(fig, draw, frames=len(all_logits), interval=200)
HTML(ani.to_html5_video())