Commit 6a7e989d authored by 刘潇(23硕士)'s avatar 刘潇(23硕士)

upload

parents
# STOP
This repository contains the implementation for the paper: "Unraveling Spatial-Temporal and Out-of-Distribution Patterns for Multivariate
Time Series Classification", accepted by WWW 2025.
## Requirements
The dependencies can be installed by:
```pip install -r requirements.txt```
## Data
UEA datasets can be downloaded in this [link](http://www.timeseriesclassification.com/). And the unzip file should be put into `datasets/`, where the original data can be located by `datasets/Multivariate_arff`.
Then you can run this command to preprocess the data.
```python preprocess.py```
## Usage
To train and evaluate STOP on a dataset, run the following command:
```python -u train_mul_ood.py --data <dataset_name> --seg_len <segment length> --r1 <r1> --r2 <r2>```
The detailed descriptions about the arguments are as following:
| Parameter name | Description of parameter |
|----------------|---------------------------------------------------------------------------|
| data | The name of dataset. |
| seg_len | The segment length. |
| r1 | the ratio of nodes to choose as neighbors in intra-correlation extraction |
| r2 | the ratio of nodes to choose as neighbors in inter-correlation extraction | |
(For descriptions of all arguments, run `python train_mul_ood.py -h`.)
For example, dataset Cricket can be directly trained by the following command:
```python -u train_mul_ood.py --data Cricket --seg_len 20```
The results will be saved at ```result.txt```.
Script: the script for reproduction is provided in ```UEA.sh```
python -u train_mul_ood.py --data ArticularyWordRecognition --seg_len 6
python -u train_mul_ood.py --data AtrialFibrillation --seg_len 16
python -u train_mul_ood.py --data BasicMotions --seg_len 5
python -u train_mul_ood.py --data CharacterTrajectories --seg_len 2
python -u train_mul_ood.py --data Cricket --seg_len 20
python -u train_mul_ood.py --data EigenWorms --seg_len 111
python -u train_mul_ood.py --data Epilepsy --seg_len 13
python -u train_mul_ood.py --data EthanolConcentration --seg_len 65
python -u train_mul_ood.py --data ERing --seg_len 6
python -u train_mul_ood.py --data FingerMovements --seg_len 5
python -u train_mul_ood.py --data HandMovementDirection --seg_len 20
python -u train_mul_ood.py --data Handwriting --seg_len 19
python -u train_mul_ood.py --data Heartbeat --seg_len 9
python -u train_mul_ood.py --data JapaneseVowels --seg_len 3
python -u train_mul_ood.py --data Libras --seg_len 9
python -u train_mul_ood.py --data LSST --seg_len 12
python -u train_mul_ood.py --data NATOPS --seg_len 2
python -u train_mul_ood.py --data PenDigits --seg_len 4
python -u train_mul_ood.py --data PhonemeSpectra --seg_len 22
python -u train_mul_ood.py --data RacketSports --seg_len 10
python -u train_mul_ood.py --data SelfRegulationSCP1 --seg_len 56
python -u train_mul_ood.py --data SelfRegulationSCP2 --seg_len 192
python -u train_mul_ood.py --data SpokenArabicDigits --seg_len 32
python -u train_mul_ood.py --data StandWalkJump --seg_len 10
python -u train_mul_ood.py --data UWaveGestureLibrary --seg_len 9
python -u train_mul_ood.py --data PEMS-SF --seg_len 12 --max_dim 36
python -u train_mul_ood.py --data MotorImagery --seg_len 50 --max_dim 32
python -u train_mul_ood.py --data FaceDetection --seg_len 7 --max_dim 36
python -u train_mul_ood.py --data DuckDuckGeese --seg_len 9
\ No newline at end of file
import scipy.sparse as sp
import torch
import torch.nn as nn
from einops import rearrange
from torch_geometric.data import Data, Batch
from construct_graph.position_encoding import POSENCODINGS
import numpy as np
class inter_topk(nn.Module):
def __init__(self, seg_len, pe_ratio, r, node_num, pe_method='lap'):
super(inter_topk, self).__init__()
self.seg_len = seg_len
self.k = int(node_num * r)
pe_dim = int(node_num * pe_ratio)
self.pe = POSENCODINGS[pe_method](pe_dim)
def forward(self, x):
batch, ts_dim, ts_len = x.shape
if ts_len % self.seg_len != 0:
x = x[:, :, :ts_len - ts_len % self.seg_len]
x_embed = rearrange(x, 'b d (seg_num seg_len) -> b d seg_num seg_len', seg_len=self.seg_len)
a = torch.einsum('bdij,bdjk->bdik', x_embed, x_embed.transpose(2, 3)) / torch.einsum('b d i, b d j -> b d i j',
x_embed.norm(dim=3),
x_embed.norm(
dim=3))
mask = torch.zeros(batch, ts_dim, x_embed.shape[2], x_embed.shape[2]).to(x.device)
mask.fill_(float('0'))
s1, t1 = a.topk(self.k, dim=3)
mask.scatter_(3, t1, s1.fill_(1))
adjs = torch.einsum('bdij,bdij->bdij', a, mask)
adjs = rearrange(adjs, 'b d i j -> d b i j')
graphs = []
for i, adj in enumerate(adjs):
# -----dimension----
batch_graph = []
for j in range(adj.shape[0]):
# -----batch_size-------
g = self.to_pygdata(adj[j], x_embed[j, i, :, :])
abs_pe_list = self.pe.apply_to(g.cpu())
g.x = torch.cat((g.x, abs_pe_list), dim=1).to(x.device)
batch_graph.append(g)
graphs.append(Batch.from_data_list(batch_graph).to(x.device))
return graphs
def to_pygdata(self, adj, feature):
adj = adj.to_dense()
adj = adj.cpu().detach().numpy()
adj = sp.coo_matrix(adj)
adj = adj.tocoo()
pyg_data = Data(x=feature,
edge_index=torch.tensor(np.array([adj.row, adj.col]), dtype=torch.long))
return pyg_data
import numpy as np
import torch.nn as nn
from itertools import combinations
import torch
import scipy.sparse as sp
from torch_geometric.data import Data
from einops import rearrange
from construct_graph.position_encoding import POSENCODINGS
class intra_topk(nn.Module):
def __init__(self, r, pe_ratio, dim_num, pe='lap', simi_ways='cos'):
super(intra_topk, self).__init__()
self.k = int(dim_num * r)
pe_dim = int(pe_ratio * dim_num)
self.pe = POSENCODINGS[pe](pe_dim)
self.dim_num = dim_num
self.simi_ways = simi_ways
def similarity_cosine(self, output):
inx = np.linspace(0, len(output) - 1, len(output), dtype=np.int32) # d
similar_matrix = torch.zeros((output.shape[1], len(output), len(output))) # [batch_size,d,d]
similar_matrix += torch.eye(len(output))
for a, b in combinations(inx, 2):
similarity = torch.div(torch.einsum('bij,bij->b', output[a], output[b]), (
torch.flatten(output[a], start_dim=1).norm(dim=1) * torch.flatten(output[b], start_dim=1).norm(
dim=1)))
similar_matrix[:, a, b] = similarity # [batch_size,d,d]
similar_matrix[:, b, a] = similarity
return similar_matrix
def euclidean_distance(self, output):
output = rearrange(output, 'd b n h->b d (n h)')
a = torch.cdist(output, output)
a = -a
mask = torch.zeros(a.shape[0], a.shape[1], a.shape[2]).to(output.device)
mask.fill_(float('0'))
s1, t1 = a.topk(self.k, dim=2)
mask.scatter_(2, t1, s1.fill_(1))
return mask
def forward(self, output1):
device = output1.device
if self.simi_ways == 'cos':
adjs = self.similarity_cosine(output1) # [batch_size,d,d]
elif self.simi_ways == 'eu':
adjs = -self.euclidean_distance(output1)
# ----------dimensional structure extractor--------------
mask = torch.zeros((output1.shape[1], len(output1), len(output1)))
mask.fill_(float('0'))
s1, t1 = adjs.topk(self.k, dim=2)
mask.scatter_(2, t1, s1.fill_(1))
adjs = adjs * mask
graphs = []
for i, adj in enumerate(adjs):
edge_index = sp.coo_matrix(adj.to_dense().cpu().detach().numpy()).tocoo()
feature = rearrange(output1[:, i, :, :], 'd n f -> d (n f)')
g = Data(x=feature,
edge_index=torch.tensor(np.array([edge_index.row, edge_index.col]), dtype=torch.long)).to(device)
pe = self.pe.apply_to(g.cpu())
g.x = torch.cat((g.x, pe), dim=1)
graphs.append(g)
return graphs
import os
import pickle
import torch
import torch_geometric.utils as utils
import numpy as np
class PositionEncoding(object):
def apply_to(self, graph):
pe = self.compute_pe(graph)
return pe
class LapEncoding(PositionEncoding):
def __init__(self, dim, normalization=None):
"""
normalization: for Laplacian None. sym or rw
"""
self.pos_enc_dim = dim
self.normalization = normalization
def compute_pe(self, graph):
laplacian = utils.get_laplacian(
graph.edge_index.long(), normalization=self.normalization,
num_nodes=graph.num_nodes)[0]
EigVal, EigVec = torch.linalg.eigh(
torch.sparse_coo_tensor(laplacian, torch.ones(laplacian.shape[1])).to_dense().to(laplacian.device))
factor = torch.randn((1, EigVec.shape[0]))
factor[factor >= 0] = 1
factor[factor < 0] = -1
EigVec *= factor
return EigVec[:, 0:self.pos_enc_dim]
POSENCODINGS = {
'lap': LapEncoding
}
import numpy as np
from torch.utils.data import Dataset
class My_Multivariate_Data(Dataset):
def __init__(self, x, y, merge_dim=0):
self.data_x = x
self.data_y = y
self.classes = self.__classes__()
self.merge_dim = merge_dim
def __getitem__(self, index):
x = self.data_x[index, :, :]
y = self.data_y[index]
return x.astype(np.float32), y
def __len__(self):
return len(self.data_x)
def __classes__(self):
return self.data_y.max() + 1
import os
import numpy as np
import pandas as pd
from scipy.io.arff import loadarff
from sklearn.preprocessing import StandardScaler, LabelEncoder
from einops import rearrange
from data_load import data_loader
import natsort
from sklearn.model_selection import train_test_split
def load(dataset, max_dim):
train_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TRAIN.arff')[0]
test_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TEST.arff')[0]
def extract_data(data):
res_data = []
res_labels = []
for t_data, t_label in data:
t_data = np.array([d.tolist() for d in t_data])
t_label = t_label.decode("utf-8")
res_data.append(t_data)
res_labels.append(t_label)
return np.array(res_data).swapaxes(1, 2), np.array(res_labels)
train_X, train_y = extract_data(train_data)
test_X, test_y = extract_data(test_data)
train_X = train_X.transpose(0, 2, 1)
test_X = test_X.transpose(0, 2, 1)
if dataset == 'FaceDetection':
# FaceDetection's length: 62->63
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'JapaneseVowels':
# FaceDetection's length: 29->30
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'Cricket':
# Cricket's length: 1197->1200
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'EigenWorms':
# EigenWorms' length: 17984->17985
# zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
# train_X = np.concatenate((train_X, zero_pad), axis=2)
# zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
# test_X = np.concatenate((test_X, zero_pad), axis=2)
# EigenWorms' length : 17984-> 17982
train_X = train_X[:, :, :len(train_X[0][0]) - 2]
test_X = test_X[:, :, :len(test_X[0][0]) - 2]
elif dataset == 'Epilepsy':
# Epilepsy's length: 206->208
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 2))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 2))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'EthanolConcentration':
# EthanolConcentration's length: 1751->1755
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 4))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 4))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'NATOPS':
# NATOPS' length: 51->54
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'PhonemeSpectra':
# PhonemeSpectra's length: 217->220
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'SpokenArabicDigits':
# SpokenArabicDigits's length: 93->96
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'ERing':
# ERing's length: 65->66
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
scaler = StandardScaler()
scaler.fit(train_X.reshape(-1, train_X.shape[-1]))
train_X = scaler.transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
test_X = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)
le = LabelEncoder()
le.fit(train_y)
train_y = le.transform(train_y)
test_y = le.transform(test_y)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.2, random_state=20,
stratify=train_y)
train_dataset = data_loader.My_Multivariate_Data(train_X, train_y)
val_dataset = data_loader.My_Multivariate_Data(val_X, val_y)
test_dataset = data_loader.My_Multivariate_Data(test_X, test_y)
return train_dataset, val_dataset, test_dataset
def load_UEA(dataset, max_dim):
train_X = np.load(f'datasets/UEA/{dataset}/{dataset}_train_x.npy')
train_y = np.load(f'datasets/UEA/{dataset}/{dataset}_train_label.npy')
test_X = np.load(f'datasets/UEA/{dataset}/{dataset}_test_x.npy')
test_y = np.load(f'datasets/UEA/{dataset}/{dataset}_test_label.npy')
train_X = np.nan_to_num(train_X)
test_X = np.nan_to_num(test_X)
scaler = StandardScaler()
scaler.fit(train_X.reshape(-1, train_X.shape[-1]))
train_X = scaler.transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
test_X = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)
labels = np.unique(train_y)
transform = {k: i for i, k in enumerate(labels)}
train_y = np.vectorize(transform.get)(train_y)
test_y = np.vectorize(transform.get)(test_y)
if dataset == 'FaceDetection':
# FaceDetection's length: 62->63
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'JapaneseVowels':
# FaceDetection's length: 29->30
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
dim_num = train_X.shape[1]
merge_dim = False
if dim_num > max_dim:
new_dim_num = int(dim_num / max_dim) + 1 if int(dim_num % max_dim != 0) else int(dim_num / max_dim)
pad_dim = max_dim - dim_num % max_dim if int(dim_num % max_dim != 0) else 0
if pad_dim != 0:
zero_pad = np.zeros((train_X.shape[0], pad_dim, train_X.shape[2]))
train_X = np.concatenate((train_X, zero_pad), axis=1)
zero_pad = np.zeros((test_X.shape[0], pad_dim, test_X.shape[2]))
test_X = np.concatenate((test_X, zero_pad), axis=1)
train_X = rearrange(train_X, 'n (d1 d2) f -> n d1 (d2 f)', d1=max_dim)
test_X = rearrange(test_X, 'n (d1 d2) f -> n d1 (d2 f)', d1=max_dim)
merge_dim = True
le = LabelEncoder()
le.fit(train_y)
train_y = le.transform(train_y)
test_y = le.transform(test_y)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.2, random_state=20,
stratify=train_y)
if not merge_dim:
train_dataset = data_loader.My_Multivariate_Data(train_X, train_y)
val_dataset = data_loader.My_Multivariate_Data(val_X, val_y)
test_dataset = data_loader.My_Multivariate_Data(test_X, test_y)
else:
train_dataset = data_loader.My_Multivariate_Data(train_X, train_y, new_dim_num)
val_dataset = data_loader.My_Multivariate_Data(val_X, val_y, new_dim_num)
test_dataset = data_loader.My_Multivariate_Data(test_X, test_y, new_dim_num)
return train_dataset, val_dataset, test_dataset
def load_UEA_csv(dataset, max_dim):
path = f'datasets/UEA_csv/{dataset}'
i = 0
j = 0
files = os.listdir(path)
files = natsort.natsorted(files)
for c in files:
if c.endswith('TRAIN.csv'):
df = pd.read_csv(os.path.join(path, c), header=None)
df = df.replace('?', np.nan).astype(np.float32)
train = df.to_numpy()
if i == 0:
train_X = train[:, np.newaxis, :]
else:
train_X = np.concatenate((train_X, train[:, np.newaxis, :]), axis=1)
i += 1
elif c.endswith("TEST.csv"):
df = pd.read_csv(os.path.join(path, c), header=None)
df = df.replace('?', np.nan).astype(np.float32)
test = df.to_numpy()
if j == 0:
test_X = test[:, np.newaxis, :]
else:
test_X = np.concatenate((test_X, test[:, np.newaxis, :]), axis=1)
j += 1
elif c.startswith('train'):
train_y = pd.read_csv(os.path.join(path, c), header=None).to_numpy().squeeze(1)
elif c.startswith('test'):
test_y = pd.read_csv(os.path.join(path, c), header=None).to_numpy().squeeze(1)
train_X = np.nan_to_num(train_X)
test_X = np.nan_to_num(test_X)
if dataset == 'Cricket':
# Cricket's length: 1197->1200
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'EigenWorms':
# EigenWorms' length : 17984-> 17982
train_X = train_X[:, :, :len(train_X[0][0]) - 2]
test_X = test_X[:, :, :len(test_X[0][0]) - 2]
elif dataset == 'Epilepsy':
# Epilepsy's length: 206->208
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 2))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 2))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'EthanolConcentration':
# EthanolConcentration's length: 1751->1755
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 4))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 4))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'NATOPS':
# NATOPS' length: 51->54
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'PhonemeSpectra':
# PhonemeSpectra's length: 217->220
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'SpokenArabicDigits':
# SpokenArabicDigits's length: 93->96
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 3))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 3))
test_X = np.concatenate((test_X, zero_pad), axis=2)
elif dataset == 'ERing':
# ERing's length: 65->66
zero_pad = np.zeros((train_X.shape[0], train_X.shape[1], 1))
train_X = np.concatenate((train_X, zero_pad), axis=2)
zero_pad = np.zeros((test_X.shape[0], test_X.shape[1], 1))
test_X = np.concatenate((test_X, zero_pad), axis=2)
dim_num = train_X.shape[1]
merge_dim = False
if dim_num > max_dim:
new_dim_num = int(dim_num / max_dim) + 1 if int(dim_num % max_dim != 0) else int(dim_num / max_dim)
pad_dim = max_dim - dim_num % max_dim
zero_pad = np.zeros((train_X.shape[0], pad_dim, train_X.shape[2]))
train_X = np.concatenate((train_X, zero_pad), axis=1)
zero_pad = np.zeros((test_X.shape[0], pad_dim, test_X.shape[2]))
test_X = np.concatenate((test_X, zero_pad), axis=1)
train_X = rearrange(train_X, 'n (d1 d2) f -> n d1 (d2 f)', d1=max_dim)
test_X = rearrange(test_X, 'n (d1 d2) f -> n d1 (d2 f)', d1=max_dim)
merge_dim = True
le = LabelEncoder()
le.fit(train_y)
train_y = le.transform(train_y)
test_y = le.transform(test_y)
scaler = StandardScaler()
scaler.fit(train_X.reshape(-1, train_X.shape[-1]))
train_X = scaler.transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
test_X = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.2, random_state=20,
stratify=train_y)
if not merge_dim:
train_dataset = data_loader.My_Multivariate_Data(train_X, train_y)
val_dataset = data_loader.My_Multivariate_Data(val_X, val_y)
test_dataset = data_loader.My_Multivariate_Data(test_X, test_y)
else:
train_dataset = data_loader.My_Multivariate_Data(train_X, train_y, new_dim_num)
val_dataset = data_loader.My_Multivariate_Data(val_X, val_y, new_dim_num)
test_dataset = data_loader.My_Multivariate_Data(test_X, test_y, new_dim_num)
return train_dataset, val_dataset, test_dataset
return train_dataset, val_dataset, test_dataset
import torch
import torch.nn as nn
from torch.autograd import Function
class ReverseLayerF(Function):
@staticmethod
def forward(ctx, x, alpha):
ctx.alpha = alpha
return x.view_as(x)
@staticmethod
def backward(ctx, grad_output):
output = grad_output.neg() * ctx.alpha
return output, None
class Discriminator(nn.Module):
def __init__(self, input_dim=256, hidden_dim=256, num_domains=4):
super(Discriminator, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
layers = [
nn.Linear(input_dim, hidden_dim),
nn.BatchNorm1d(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.BatchNorm1d(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, num_domains),
]
self.layers = torch.nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
from construct_graph import inter_graph, intra_graph
from torch_geometric.nn import global_max_pool
from torch_geometric.utils import k_hop_subgraph
from torch_geometric.data import Batch, Data
from torch_geometric.nn import GCNConv
from model.adver_network import ReverseLayerF, Discriminator
from einops import rearrange
import torch.nn as nn
import torch
class main_model_ood(nn.Module):
def __init__(self, seg_len, d_model1, d_model2, r1, r2, k_hops, dim, node_num, pe_ratio,
num_class=2, num_layers=1, class_type='mlp'):
super(main_model_ood, self).__init__()
self.seg_len = seg_len
self.k_hops = k_hops
self.dim = dim
self.graph_builder = inter_graph.inter_topk(seg_len, pe_ratio, r1, node_num)
self.process_graph = intra_graph.intra_topk(r2, pe_ratio, dim)
self.gnn_num_layers = num_layers
self.time_gnns = nn.ModuleList([])
for j in range(num_layers):
self.time_gnn = nn.ModuleList([])
for i in range(dim):
if j == 0:
self.time_gnn.append(GCNConv(seg_len + int(node_num * pe_ratio), d_model1))
else:
self.time_gnn.append(GCNConv(d_model1, d_model1))
self.time_gnns.append(self.time_gnn)
self.subgraph_gnns = nn.ModuleList([])
self.dim_gnns = nn.ModuleList([])
for j in range(num_layers):
if j == 0:
self.subgraph_gnn = GCNConv(d_model1 * node_num + int(dim * pe_ratio), d_model2)
else:
self.subgraph_gnn = GCNConv(d_model2, d_model2)
self.subgraph_gnns.append(self.subgraph_gnn)
self.dim_gnns.append(GCNConv(d_model2, d_model2))
if class_type == 'mlp':
self.task_classifier = nn.Sequential(
nn.Linear(d_model2, d_model2),
nn.ReLU(True),
nn.Linear(d_model2, num_class)
)
elif class_type == 'lstm':
self.task_classifier = nn.Sequential(
nn.LSTM(d_model2, d_model2),
nn.Linear(d_model2, num_class)
)
else:
assert 'only support mlp, lstm.'
self.domain_classifier = Discriminator(d_model2, d_model2, dim)
def forward(self, x):
graphs = self.graph_builder(x)
time_output = []
for i, g in enumerate(graphs):
for j in range(self.gnn_num_layers):
output = self.time_gnns[j][i](g.x, g.edge_index)
g.x = output
time_output.append(g.x)
batch_size = x.shape[0]
time_output = rearrange(torch.stack(time_output).to(x.device), 'd (b n) f -> d b n f', b=batch_size)
dim_graph = self.process_graph(time_output)
dim_graph = Batch.from_data_list(dim_graph).to(x.device)
dim_subgraphs = Batch.from_data_list(self.get_subgraph_data(self.k_hops, dim_graph)).to(x.device)
for i in range(self.gnn_num_layers):
subgraph_output = self.subgraph_gnns[i](dim_subgraphs.x, dim_subgraphs.edge_index)
dim_subgraphs.x = subgraph_output
sub_output = global_max_pool(dim_subgraphs.x, dim_subgraphs.batch)
for i in range(self.gnn_num_layers):
dim_output = self.dim_gnns[i](sub_output, dim_graph.edge_index)
sub_output = dim_output
dim_output = sub_output
domain_output = self.domain_classifier(ReverseLayerF.apply(dim_output, 1.5))
dim_output = rearrange(dim_output, '(b d) f-> b d f', b=batch_size)
dim_output = global_max_pool(dim_output, batch=None)
return self.task_classifier(dim_output), domain_output, dim_output
def get_subgraph_data(self, k_hops, graph):
subgraphs = []
for node_idx in range(graph.num_nodes):
sub_nodes, sub_edge_index, _, edge_mask = k_hop_subgraph(torch.tensor([node_idx], dtype=torch.long), k_hops,
graph.edge_index,
num_nodes=graph.num_nodes, relabel_nodes=True)
g = Data(x=graph.x[sub_nodes], edge_index=sub_edge_index)
subgraphs.append(g)
return subgraphs
import pandas as pd
import os
import natsort
import numpy as np
from scipy.io.arff import loadarff
def extract_data(data):
res_data = []
res_labels = []
for t_data, t_label in data:
t_data = np.array([d.tolist() for d in t_data])
t_label = t_label.decode("utf-8")
res_data.append(t_data)
res_labels.append(t_label)
return np.array(res_data).swapaxes(1, 2), np.array(res_labels)
def arff_to_numpy(dataset):
train_data = loadarff(f'datasets/Multivariate_arff/{dataset}/{dataset}_TRAIN.arff')[0]
test_data = loadarff(f'datasets/Multivariate_arff/{dataset}/{dataset}_TEST.arff')[0]
train_X, train_y = extract_data(train_data)
test_X, test_y = extract_data(test_data)
train_X = train_X.transpose(0, 2, 1)
test_X = test_X.transpose(0, 2, 1)
if not os.path.exists(f'datasets/UEA/{dataset}'):
os.makedirs(f'datasets/UEA/{dataset}')
np.save(f'datasets/UEA/{dataset}/{dataset}_train_x.npy', train_X)
np.save(f'datasets/UEA/{dataset}/{dataset}_train_label.npy', train_y)
np.save(f'datasets/UEA/{dataset}/{dataset}_test_x.npy', test_X)
np.save(f'datasets/UEA/{dataset}/{dataset}_test_label.npy', test_y)
def arff_to_csv(data_set_dir, data_save_dir):
data_set_list = os.listdir(data_set_dir)
if not os.path.exists(f'datasets/UEA_csv'):
os.makedirs(f'datasets/UEA_csv')
for dataset_name in data_set_list:
if dataset_name in ['DuckDuckGeese', 'FaceDetection', 'InsectWingbeat', 'JapaneseVowels']:
arff_to_numpy(dataset_name)
elif dataset_name in ['Descriptions','DataDimensions.csv']:
continue
else:
dataset_name_path = data_set_dir + "/" + dataset_name
if os.path.isdir(dataset_name_path):
dataset_name_path_list = natsort.natsorted(os.listdir(dataset_name_path), alg=natsort.ns.PATH)
train_label_tag = False
test_label_tag = False
for data_file in dataset_name_path_list:
data_format = data_file.split('.')[1]
data_name = data_file.split('.')[0]
if data_format == 'arff' and 'Dimension' in data_name:
train_or_test = data_name.split('_')[1].lower()
file_name = dataset_name_path + "/" + data_file
with open(file_name, encoding="utf-8") as f:
header = []
for line in f:
if line.startswith("@attribute"):
header.append(line.split()[1])
elif line.startswith("@data"):
break
if os.path.getsize(file_name) > 0:
data_label = pd.read_csv(f, header=None)
else:
print("---empty file---" + data_file)
continue
label = data_label.iloc[:, -1]
data = data_label.iloc[:, :data_label.shape[1] - 1]
data_csv_dir = data_save_dir + "/" + dataset_name
if not os.path.exists(data_csv_dir):
os.mkdir(data_csv_dir)
file_name_data = data_save_dir + "/" + dataset_name + "/" + data_name
file_name_label = data_save_dir + "/" + dataset_name + "/" + train_or_test + "_label.csv"
if not train_label_tag and train_or_test == 'train':
label.to_csv(file_name_label, mode='w', index=False, header=None, encoding='utf-8')
train_label_tag = True
if not test_label_tag and train_or_test == 'test':
label.to_csv(file_name_label, mode='w', index=False, header=None, encoding='utf-8')
test_label_tag = True
data.to_csv(file_name_data + ".csv", mode='w', index=False, header=None, encoding='utf-8')
# print(data_file)
print(dataset_name,'done!')
arff_to_csv('datasets/Multivariate_arff', 'datasets/UEA_csv')
\ No newline at end of file
blessed==1.20.0
Bottleneck==1.3.5
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
cmake==3.25.0
contourpy==1.0.5
cycler==0.11.0
einops==0.6.1
filelock==3.9.0
fonttools==4.25.0
future==0.18.3
gpustat==1.1
idna==3.4
importlib-resources==5.2.0
Jinja2==3.1.2
joblib==1.3.1
kiwisolver==1.4.4
lit==15.0.7
MarkupSafe==2.1.2
matplotlib==3.7.1
mkl-fft==1.3.6
mkl-random==1.2.2
mkl-service==2.4.0
mpmath==1.2.1
munkres==1.1.4
natsort==8.4.0
networkx==3.0
numexpr==2.8.4
numpy==1.24.1
nvidia-ml-py==12.535.77
packaging==23.0
pandas==1.5.3
Pillow==9.3.0
pip==23.1.2
psutil==5.9.5
pycparser==2.21
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2022.7
PyYAML==6.0
requests==2.28.1
scikit-learn==1.3.0
scipy==1.10.1
seaborn==0.12.2
setuptools==67.8.0
six==1.16.0
sympy==1.11.1
threadpoolctl==3.2.0
torch==1.12.1
torch-geometric==2.3.1
torch-scatter==2.1.0
torchaudio==2.0.2+cu118
torchvision==0.15.2+cu118
tqdm==4.65.1
triton==2.0.0
typing_extensions==4.4.0
tzdata==2023.3
urllib3==1.26.13
wcwidth==0.2.6
wheel==0.38.4
zipp==3.11.0
import argparse
import random
from model.main_model import main_model_ood
from utils.early_stopping import EarlyStopping
from data_load import datautils
import gc
import numpy as np
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch
import torch.nn.functional as F
def setup_seed(seed):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Time Series to Graph for Classification')
parser.add_argument('--data', type=str, default='PEMS-SF', help='name of data')
parser.add_argument('--batch_size', type=int, default=32, help='batch size')
parser.add_argument('--max_epoch', type=int, default=200, help='max epoch')
parser.add_argument('--lr', type=float, default=0.003, help='learning rate')
parser.add_argument('--weight_decay', type=float, default=1e-5, help='weight decay')
parser.add_argument('--r1', type=float, default=0.5,
help='the ratio of nodes to choose as neighbors in intra-correlation extraction')
parser.add_argument('--r2', type=float, default=0.7,
help='the ratio of nodes to choose as neighbors in inter-correlation extraction')
parser.add_argument('--seg_len', type=int, default=12, help='the length of each segment')
parser.add_argument('--gpu', type=int, default=0,
help='The gpu no. used for training and inference (defaults to 0)')
parser.add_argument('--d_model1', type=int, default=96,
help='the hidden dim of feature after building graph')
parser.add_argument('--d_model2', type=int, default=32,
help='the dimension of feature after dimensional graph')
parser.add_argument('--seed', type=int, default=20, help='random seed')
parser.add_argument('--k_hops', type=int, default=3, help='the number of neighbors')
parser.add_argument('--pe_ratio', type=float, default=0.8, help='the ratio of positional encoding dimension')
parser.add_argument('--patience', type=int, default=15, help='patience to end training, default is 15')
parser.add_argument('--max_dim', type=int, default=50, help='max dimension to avoid out of memory')
parser.add_argument('--alpha', type=float ,default=1, help='the weight of domain cross entropy loss')
parser.add_argument('--save_path', type=str, default='model_save/ood', help='the path to save model')
parser.add_argument('--result_path', type=str, default='result.txt', help='the path to save results')
args = parser.parse_args()
for k, v in sorted(vars(args).items()):
print(k, '=', v, end='. ')
setup_seed(args.seed)
if torch.cuda.is_available():
print('GPU available.')
device = torch.device('cuda:{}'.format(args.gpu))
else:
device = 'cpu'
if args.data in ['DuckDuckGeese', 'FaceDetection', 'InsectWingbeat', 'JapaneseVowels']:
train_dataset, val_dataset, test_dataset = datautils.load_UEA(args.data, args.max_dim)
else:
train_dataset, val_dataset, test_dataset = datautils.load_UEA_csv(args.data, args.max_dim)
train_data_num = train_dataset.data_x.shape[0]
val_data_num = val_dataset.data_x.shape[0]
test_data_num = test_dataset.data_x.shape[0]
train_ts = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
val_ts = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
test_ts = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
if train_dataset.merge_dim != 0:
args.seg_len = args.seg_len * train_dataset.merge_dim
num_classes = int(train_dataset.classes)
dim_num = int(train_dataset.data_x.shape[1])
ts_len = int(train_dataset.data_x.shape[2])
time_node_num = int(ts_len / args.seg_len)
model = main_model_ood(args.seg_len, args.d_model1, args.d_model2, args.r1, args.r2, args.k_hops, dim_num,
time_node_num, args.pe_ratio,
num_classes)
loss_fn = torch.nn.CrossEntropyLoss()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
label = torch.arange(start=0, end=dim_num, step=1)
early_stopping = EarlyStopping(args.save_path, patience=args.patience)
for epoch in tqdm(range(args.max_epoch)):
setup_seed(args.seed)
model.train()
task_correct = 0
tr_loss_task = 0
domain_correct = 0
tr_loss_domain = 0
for i, (x, task_label) in enumerate(train_ts):
setup_seed(args.seed)
x = x.to(device)
task_label = task_label.to(device)
task_outputs, domain_output, _ = model(x)
domain_label = label.repeat(1, task_label.shape[0]).T.to(device).squeeze(1)
task_loss = loss_fn(task_outputs, task_label.long())
domain_loss = loss_fn(domain_output, domain_label.long())
loss = task_loss + domain_loss * args.alpha
optimizer.zero_grad()
loss.backward()
optimizer.step()
task_pre = torch.argmax(task_outputs, dim=1)
domain_pre = torch.argmax(domain_output, dim=1)
task_correct += ((task_pre == task_label.long()).sum().item())
domain_correct += ((domain_pre == domain_label.long()).sum().item())
tr_loss_task += task_loss.item()
tr_loss_domain += domain_loss.item()
gc.collect()
torch.cuda.empty_cache()
train_acc = task_correct / train_data_num
# --------------------------------val---------------------------------------
val_correct = 0
val_loss = 0
model.eval()
for i, (x, val_label) in enumerate(val_ts):
x = x.to(device)
val_label = val_label.to(device)
val_outputs, domain_output, _ = model(x)
val_loss += loss_fn(val_outputs, val_label.long()).item()
val_correct += ((torch.argmax(val_outputs, dim=1) == val_label.long()).sum().item())
val_acc = val_correct / val_data_num
# print(
# "epoch: {}. train_acc:{:.6f}. train_loss:{:.6f} val_acc: {:.6f}. val_loss: {:.6f}".format(epoch, train_acc,
# tr_loss_task / len(train_ts),
# val_acc,
# val_loss / len(val_ts)))
early_stopping(val_loss / len(val_ts), model, args.data)
if early_stopping.early_stop:
print("Early stopping")
break
# ----------------------------------test------------------------------------------
test_correct = 0
model.load_state_dict(torch.load('{}/model_{}.pth'.format(args.save_path, args.data)))
model.eval()
with torch.no_grad():
for i, (x, task_label) in enumerate(test_ts):
x = x.to(device)
task_label = task_label.to(device)
task_outputs, domain_output, _ = model(x)
task_pre = torch.argmax(task_outputs, dim=1)
test_correct += ((task_pre == task_label.long()).sum().item())
if i == 0:
all_outputs = task_outputs
all_label = task_label
else:
all_outputs = torch.cat((all_outputs, task_outputs), dim=0)
all_label = torch.cat((all_label, task_label), dim=0)
if num_classes == 2:
onehot_label = label_binarize(all_label.cpu().detach().numpy(), classes=np.arange(num_classes))
auprc = average_precision_score(onehot_label, F.softmax(all_outputs, dim=1)[:, 1].cpu().detach().numpy())
else:
onehot_label = label_binarize(all_label.cpu().detach().numpy(), classes=np.arange(num_classes))
auprc = average_precision_score(onehot_label, F.softmax(all_outputs, dim=1).cpu().detach().numpy())
test_acc = test_correct / test_data_num
with open(args.result_path, 'a') as f:
print("dataset: {} seed: {} acc:{:.6f}, auprc:{:.6f}".format(args.data, args.seed, test_acc, auprc),
file=f)
f.close()
import numpy as np
import torch
import os
class EarlyStopping:
"""Early stops the training if validation loss doesn't improve after a given patience."""
def __init__(self, save_path, patience=7, verbose=False, delta=0):
"""
Args:
save_path : 模型保存文件夹
patience (int): How long to wait after last time validation loss improved.
Default: 7
verbose (bool): If True, prints a message for each validation loss improvement.
Default: False
delta (float): Minimum change in the monitored quantity to qualify as an improvement.
Default: 0
"""
self.save_path = save_path
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss, model,data):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model,data)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model,data)
self.counter = 0
def save_checkpoint(self, val_loss, model,data):
'''Saves model when validation loss decrease.'''
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
path = os.path.join(self.save_path, 'model_{}.pth'.format(data))
torch.save(model.state_dict(), path) # 这里会存储迄今最优模型的参数
self.val_loss_min = val_loss
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment