oucgc1996 commited on
Commit
1afa9ba
1 Parent(s): 8799be2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -34
app.py CHANGED
@@ -51,7 +51,7 @@ class MyDataset(Dataset):
51
  def __init__(self,dict_data) -> None:
52
  super(MyDataset,self).__init__()
53
  self.data=dict_data
54
- self.structure=pdb_structure(dict_data['text'])
55
  def __getitem__(self, index):
56
  return self.data['text'][index], self.structure[index]
57
  def __len__(self):
@@ -174,51 +174,57 @@ class MyModel(nn.Module):
174
  output_feature = self.dropout(self.relu(self.bn2(self.fc2(output_feature))))
175
  output_feature = self.dropout(self.relu(self.bn3(self.fc3(output_feature))))
176
  output_feature = self.dropout(self.output_layer(output_feature))
177
- print(output_feature)
178
  return torch.softmax(output_feature,dim=1)
179
 
180
 
181
- def pdb_structure(test_sequences):
182
- # structure_folder = join(temp_path, str(index)+".pdb")
183
- # created_folders.append(structure_folder)
184
- # pdb_file = join(pdb_path, f"{index}.pdb")
185
- # if os.path.exists(pdb_file):
186
- # shutil.copy2(pdb_file, structure_folder)
187
- # else:
188
- # print(f"PDB file not found for structure {index}")
189
- print("=================================Structure prediction========================")
190
- command = ["curl", "-X", "POST", "-k", "--data", f"{test_sequences}", "https://api.esmatlas.com/foldSequence/v1/pdb/"]
191
- result = subprocess.run(command, capture_output=True, text=True)
192
- # with open(os.path.join(pdb_path, f'{test_Structure_index[i]}.pdb'), 'w') as file:
193
- # file.write(result.stdout)
194
- coords, atname, pdbname, pdb_num = utils.parsePDB(result.stdout)
195
- atoms_channel = utils.atomlistToChannels(atname)
196
- radius = utils.atomlistToRadius(atname)
197
- PointCloudSurfaceObject = VolumeMaker.PointCloudSurface(device=device)
198
- coords = coords.to(device)
199
- radius = radius.to(device)
200
- atoms_channel = atoms_channel.to(device)
201
- SurfacePoitCloud = PointCloudSurfaceObject(coords, radius)
202
- feature = SurfacePoitCloud.view(pdb_num,-1,3).cpu()
203
- SurfacePoitCloud_all_tensor = torch.squeeze(torch.stack(feature),dim=1)
 
 
204
  return SurfacePoitCloud_all_tensor
205
 
206
  def ACE(file):
 
 
 
 
 
207
  # df = pd.read_csv(seq_path)
208
  # test_sequences = df["Seq"].tolist()
209
  # test_Structure_index = df["Structure_index"].tolist()
210
 
211
  test_sequences = [file]
212
- test_Structure_index = ["structure_1"]
213
 
214
 
215
  test_dict = {"text":test_sequences, 'structure':test_Structure_index}
216
- # print("=================================Structure prediction========================")
217
- # for i in tqdm(range(0, len(test_sequences))):
218
- # command = ["curl", "-X", "POST", "-k", "--data", f"{test_sequences[i]}", "https://api.esmatlas.com/foldSequence/v1/pdb/"]
219
- # result = subprocess.run(command, capture_output=True, text=True)
220
- # with open(os.path.join(pdb_path, f'{test_Structure_index[i]}.pdb'), 'w') as file:
221
- # file.write(result.stdout)
222
  test_data=MyDataset(test_dict)
223
  test_dataloader=DataLoader(test_data,batch_size=batch_size,collate_fn=collate_fn,shuffle=False)
224
 
@@ -235,6 +241,7 @@ def ACE(file):
235
  print("=================================Start prediction========================")
236
  for index, (batch, structure_fea, fingerprint) in enumerate(test_dataloader):
237
  batchs = {k: v for k, v in batch.items()}
 
238
  outputs = model(structure_fea, batchs, fingerprint)
239
  probability = outputs[0].tolist()
240
  print(outputs)
@@ -257,8 +264,12 @@ def ACE(file):
257
  summary['Probability'] = probability_all
258
  summary_df = pd.DataFrame(summary)
259
  summary_df.to_csv('output.csv', index=False)
260
- out_text = output
261
- out_prob = probability
 
 
 
 
262
  return 'output.csv', out_text, out_prob
263
 
264
  iface = gr.Interface(fn=ACE,
 
51
  def __init__(self,dict_data) -> None:
52
  super(MyDataset,self).__init__()
53
  self.data=dict_data
54
+ self.structure=pdb_structure(dict_data['structure'])
55
  def __getitem__(self, index):
56
  return self.data['text'][index], self.structure[index]
57
  def __len__(self):
 
174
  output_feature = self.dropout(self.relu(self.bn2(self.fc2(output_feature))))
175
  output_feature = self.dropout(self.relu(self.bn3(self.fc3(output_feature))))
176
  output_feature = self.dropout(self.output_layer(output_feature))
 
177
  return torch.softmax(output_feature,dim=1)
178
 
179
 
180
+ def pdb_structure(Structure_index):
181
+ created_folders = []
182
+ SurfacePoitCloud_all = []
183
+ for index in Structure_index:
184
+ structure_folder = join(temp_path, str(index))
185
+ os.makedirs(structure_folder, exist_ok=True)
186
+ created_folders.append(structure_folder)
187
+ pdb_file = join(pdb_path, f"{index}.pdb")
188
+ if os.path.exists(pdb_file):
189
+ shutil.copy2(pdb_file, structure_folder)
190
+ else:
191
+ print(f"PDB file not found for structure {index}")
192
+ coords, atname, pdbname, pdb_num = utils.parsePDB(structure_folder)
193
+ atoms_channel = utils.atomlistToChannels(atname)
194
+ radius = utils.atomlistToRadius(atname)
195
+ PointCloudSurfaceObject = VolumeMaker.PointCloudSurface(device=device)
196
+ coords = coords.to(device)
197
+ radius = radius.to(device)
198
+ atoms_channel = atoms_channel.to(device)
199
+ SurfacePoitCloud = PointCloudSurfaceObject(coords, radius)
200
+ feature = SurfacePoitCloud.view(pdb_num,-1,3).cpu()
201
+ SurfacePoitCloud_all.append(feature)
202
+ SurfacePoitCloud_all_tensor = torch.squeeze(torch.stack(SurfacePoitCloud_all),dim=1)
203
+ for folder in created_folders:
204
+ shutil.rmtree(folder)
205
  return SurfacePoitCloud_all_tensor
206
 
207
  def ACE(file):
208
+ if not os.path.exists(pdb_path):
209
+ os.makedirs(pdb_path)
210
+ else:
211
+ shutil.rmtree(pdb_path)
212
+ os.makedirs(pdb_path)
213
  # df = pd.read_csv(seq_path)
214
  # test_sequences = df["Seq"].tolist()
215
  # test_Structure_index = df["Structure_index"].tolist()
216
 
217
  test_sequences = [file]
218
+ test_Structure_index = [f"structure_{i}" for i in range(len(test_sequences))]
219
 
220
 
221
  test_dict = {"text":test_sequences, 'structure':test_Structure_index}
222
+ print("=================================Structure prediction========================")
223
+ for i in tqdm(range(0, len(test_sequences))):
224
+ command = ["curl", "-X", "POST", "-k", "--data", f"{test_sequences[i]}", "https://api.esmatlas.com/foldSequence/v1/pdb/"]
225
+ result = subprocess.run(command, capture_output=True, text=True)
226
+ with open(os.path.join(pdb_path, f'{test_Structure_index[i]}.pdb'), 'w') as file:
227
+ file.write(result.stdout)
228
  test_data=MyDataset(test_dict)
229
  test_dataloader=DataLoader(test_data,batch_size=batch_size,collate_fn=collate_fn,shuffle=False)
230
 
 
241
  print("=================================Start prediction========================")
242
  for index, (batch, structure_fea, fingerprint) in enumerate(test_dataloader):
243
  batchs = {k: v for k, v in batch.items()}
244
+ print(structure_fea)
245
  outputs = model(structure_fea, batchs, fingerprint)
246
  probability = outputs[0].tolist()
247
  print(outputs)
 
264
  summary['Probability'] = probability_all
265
  summary_df = pd.DataFrame(summary)
266
  summary_df.to_csv('output.csv', index=False)
267
+ if len(test_sequences) > 1:
268
+ out_text = "Please download csv"
269
+ out_prob = "Please download csv"
270
+ else:
271
+ out_text = output
272
+ out_prob = probability
273
  return 'output.csv', out_text, out_prob
274
 
275
  iface = gr.Interface(fn=ACE,