oucgc1996 commited on
Commit
161bfc9
1 Parent(s): 781c0ca

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import pandas as pd
4
+ from transformers import set_seed
5
+ import torch
6
+ import torch.nn as nn
7
+ from collections import OrderedDict
8
+ import warnings
9
+ import random
10
+ import gradio as gr
11
+
12
+ warnings.filterwarnings('ignore')
13
+ set_seed(4)
14
+ device = "cpu"
15
+ model_checkpoint = "facebook/esm2_t12_35M_UR50D"
16
+ dropout = 0.1
17
+
18
+ def setup_seed(seed):
19
+ torch.manual_seed(seed)
20
+ torch.cuda.manual_seed_all(seed)
21
+ np.random.seed(seed)
22
+ random.seed(seed)
23
+ torch.backends.cudnn.deterministic = True
24
+ setup_seed(4)
25
+
26
+ class MyModel(nn.Module):
27
+ def __init__(self):
28
+ super().__init__()
29
+ self.bert = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,num_labels=320)
30
+ self.bn1 = nn.BatchNorm1d(256)
31
+ self.bn2 = nn.BatchNorm1d(128)
32
+ self.bn3 = nn.BatchNorm1d(64)
33
+ self.relu = nn.ReLU()
34
+ self.fc1 = nn.Linear(320,256)
35
+ self.fc2 = nn.Linear(256,128)
36
+ self.fc3 = nn.Linear(128,64)
37
+ self.output_layer = nn.Linear(64,2)
38
+ self.dropout = nn.Dropout(dropout)
39
+
40
+ def forward(self,x):
41
+ with torch.no_grad():
42
+ bert_output = self.bert(input_ids=x['input_ids'].to(device),attention_mask=x['attention_mask'].to(device))
43
+ output_feature = self.dropout(bert_output["logits"])
44
+ output_feature = self.dropout(self.relu(self.bn1(self.fc1(output_feature))))
45
+ output_feature = self.dropout(self.relu(self.bn2(self.fc2(output_feature))))
46
+ output_feature = self.dropout(self.relu(self.bn3(self.fc3(output_feature))))
47
+ output_feature = self.dropout(self.output_layer(output_feature))
48
+ return torch.softmax(output_feature,dim=1)
49
+
50
+ model = MyModel()
51
+ model.load_state_dict(torch.load("best_model.pth"))
52
+ model = model.to(device)
53
+ model.eval()
54
+
55
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
56
+
57
+ def pre(file):
58
+ test_sequences = file
59
+ max_len = 30
60
+ test_data = tokenizer(test_sequences, max_length=max_len, padding="max_length",truncation=True, return_tensors='pt')
61
+ out_probability = []
62
+ with torch.no_grad():
63
+ predict = model(test_data)
64
+ out_probability.extend(np.max(np.array(predict.cpu()),axis=1).tolist())
65
+ test_argmax = np.argmax(predict.cpu(), axis=1).tolist()
66
+ id2str = {0:"non-nAChRs", 1:"nAChRs"}
67
+ return id2str[test_argmax[0]], out_probability[0]
68
+
69
+ def conotoxinfinder(files):
70
+ fr=open(files, 'r')
71
+ seqs = []
72
+ for line in fr:
73
+ if not line.startswith('>'): #判断字符串是否以‘>开始’
74
+ seqs.append(line)
75
+ seq_all = []
76
+ output_all = []
77
+ probability_all = []
78
+ for seq in seqs:
79
+ output, probability = pre(str(seq))
80
+ seq_all.append(seq)
81
+ output_all.append(output)
82
+ probability_all.append(probability)
83
+ summary = OrderedDict()
84
+ summary['Seq'] = seq_all
85
+ summary['Class'] = output_all
86
+ summary['Probability'] = probability_all
87
+ summary_df = pd.DataFrame(summary)
88
+ summary_df.to_csv('output.csv', index=False)
89
+ return 'outputs.csv'
90
+
91
+ with open("conotoxinfinder.md", "r") as f:
92
+ description = f.read()
93
+ iface = gr.Interface(fn=conotoxinfinder,
94
+ title="ConotoxinFinder nAChRs",
95
+ inputs=[gr.Input("file", type="file", label="upload a file (.txt, .fasta) containing sequences")
96
+ ],
97
+ outputs= "file",
98
+ description=description
99
+ )
100
+ iface.launch()