Spaces:

sagawa
/

PLTNUM

App Files Files Community

sagawa commited on Aug 28

Commit

a0912bb

•

1 Parent(s): 7cee862

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -30

app.py CHANGED Viewed

@@ -28,50 +28,65 @@ class Config:
         self.seed = 42
 def predict_stability_with_pdb(model_choice, organism_choice, pdb_files, cfg=Config()):
-    results = {"file_name": [],
-               "raw prediction value": [],
-               "binary prediction value": []
-               }
     file_names = []
     input_sequences = []
     for pdb_file in pdb_files:
         pdb_path = pdb_file.name
-        os.system("chmod 777 bin/foldseek")
         sequences = get_foldseek_seq(pdb_path)
         if not sequences:
-            results["file_name"].append(pdb_file.name.split("/")[-1])
             results["raw prediction value"].append(None)
             results["binary prediction value"].append(None)
             continue
         sequence = sequences[2] if model_choice == "SaProt" else sequences[0]
-        file_names.append(pdb_file.name.split("/")[-1])
         input_sequences.append(sequence)
-    raw_prediction, binary_prediction = predict_stability_core(model_choice, organism_choice, input_sequences, cfg)
-    results["file_name"] = results["file_name"] + file_names
-    results["raw prediction value"] = results["raw prediction value"] + raw_prediction
-    results["binary prediction value"] = results["binary prediction value"] + binary_prediction
     df = pd.DataFrame(results)
     output_csv = "/tmp/predictions.csv"
     df.to_csv(output_csv, index=False)
     return output_csv
-def predict_stability_with_sequence(model_choice, organism_choice, sequence, cfg=Config()):
     try:
-        if not sequence:
-            return "No valid sequence provided."
-        raw_prediction, binary_prediction = predict_stability_core(model_choice, organism_choice, [sequence], cfg)
-        df = pd.DataFrame({"sequence": sequence, "raw prediction value": raw_prediction, "binary prediction value": binary_prediction})
         output_csv = "/tmp/predictions.csv"
         df.to_csv(output_csv, index=False)
-        return output_csv
     except Exception as e:
         return f"An error occurred: {str(e)}"
@@ -110,6 +125,7 @@ def predict(cfg, sequences):
         cfg.model_path, padding_side=cfg.padding_side
     )
     cfg.tokenizer = tokenizer
     dataset = PLTNUMDataset(cfg, df, train=False)
     dataloader = DataLoader(
         dataset,
@@ -126,19 +142,19 @@ def predict(cfg, sequences):
     model.eval()
     predictions = []
-    for inputs, _ in dataloader:
-        inputs = inputs.to(cfg.device)
-        with torch.no_grad():
             with torch.amp.autocast(cfg.device, enabled=cfg.use_amp):
                 preds = (
                     torch.sigmoid(model(inputs))
                     if cfg.task == "classification"
                     else model(inputs)
                 )
-        predictions += preds.cpu().tolist()
     predictions = list(itertools.chain.from_iterable(predictions))
     return predictions, [1 if x > 0.5 else 0 for x in predictions]
@@ -174,9 +190,7 @@ with gr.Blocks() as demo:
             gr.Markdown("### Upload your PDB files:")
             pdb_files = gr.File(label="Upload PDB Files", file_count="multiple")
             predict_button = gr.Button("Predict Stability")
-            prediction_output = gr.File(
-                label="Download Predictions"
-            )
             predict_button.click(
                 fn=predict_stability_with_pdb,
@@ -192,9 +206,7 @@ with gr.Blocks() as demo:
                 lines=8,
             )
             predict_button = gr.Button("Predict Stability")
-            prediction_output = gr.File(
-                label="Download Predictions"
-            )
             predict_button.click(
                 fn=predict_stability_with_sequence,

         self.seed = 42
 def predict_stability_with_pdb(model_choice, organism_choice, pdb_files, cfg=Config()):
+    results = {
+        "file_name": [],
+        "raw prediction value": [],
+        "binary prediction value": [],
+    }
     file_names = []
     input_sequences = []
+    os.system("chmod 777 bin/foldseek")
     for pdb_file in pdb_files:
         pdb_path = pdb_file.name
         sequences = get_foldseek_seq(pdb_path)
+        file_name = os.path.basename(pdb_path)
         if not sequences:
+            results["file_name"].append(file_name)
             results["raw prediction value"].append(None)
             results["binary prediction value"].append(None)
             continue
         sequence = sequences[2] if model_choice == "SaProt" else sequences[0]
+        file_names.append(file_name)
         input_sequences.append(sequence)
+    raw_pred, binary_pred = predict_stability_core(
+        model_choice, organism_choice, input_sequences, cfg
+    )
+    results["file_name"].extend(file_names)
+    results["raw prediction value"].extend(raw_pred)
+    results["binary prediction value"].extend(binary_pred)
     df = pd.DataFrame(results)
     output_csv = "/tmp/predictions.csv"
     df.to_csv(output_csv, index=False)
     return output_csv
+def predict_stability_with_sequence(
+    model_choice, organism_choice, sequence, cfg=Config()
+):
+    if not sequence:
+        return "No valid sequence provided."
     try:
+        raw_pred, binary_pred = predict_stability_core(
+            model_choice, organism_choice, [sequence], cfg
+        )
+        df = pd.DataFrame(
+            {
+                "sequence": sequence,
+                "raw prediction value": raw_pred,
+                "binary prediction value": binary_pred,
+            }
+        )
         output_csv = "/tmp/predictions.csv"
         df.to_csv(output_csv, index=False)
+        return output_csv
     except Exception as e:
         return f"An error occurred: {str(e)}"
         cfg.model_path, padding_side=cfg.padding_side
     )
     cfg.tokenizer = tokenizer
     dataset = PLTNUMDataset(cfg, df, train=False)
     dataloader = DataLoader(
         dataset,
     model.eval()
     predictions = []
+    with torch.no_grad():
+        for inputs, _ in dataloader:
+            inputs = inputs.to(cfg.device)
             with torch.amp.autocast(cfg.device, enabled=cfg.use_amp):
                 preds = (
                     torch.sigmoid(model(inputs))
                     if cfg.task == "classification"
                     else model(inputs)
                 )
+        predictions.extend(preds.cpu().tolist())
     predictions = list(itertools.chain.from_iterable(predictions))
     return predictions, [1 if x > 0.5 else 0 for x in predictions]
             gr.Markdown("### Upload your PDB files:")
             pdb_files = gr.File(label="Upload PDB Files", file_count="multiple")
             predict_button = gr.Button("Predict Stability")
+            prediction_output = gr.File(label="Download Predictions")
             predict_button.click(
                 fn=predict_stability_with_pdb,
                 lines=8,
             )
             predict_button = gr.Button("Predict Stability")
+            prediction_output = gr.File(label="Download Predictions")
             predict_button.click(
                 fn=predict_stability_with_sequence,