Imran1
/

Qwen2.5-72B-Instruct-FP8

FINGU-AI commited on 14 days ago

Commit

a6861fa

•

1 Parent(s): df9d248

Update inference.py (#7)

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -5,12 +5,19 @@ import sys
 import torch
 from typing import List, Dict
-# Ensure vllm is installed
 try:
  import vllm
 except ImportError:
- subprocess.check_call([sys.executable, "-m", "pip", "install", "vllm"])
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid

 import torch
 from typing import List, Dict
+# Ensure vllm is installed and specify version to match CUDA compatibility
 try:
  import vllm
 except ImportError:
+ # Check CUDA version and install the correct vllm version
+ cuda_version = torch.version.cuda
+ if cuda_version == "11.8":
+ vllm_version = "v0.6.1.post1"
+ pip_cmd = f"pip install https://github.com/vllm-project/vllm/releases/download/{vllm_version}/vllm-{vllm_version}+cu118-cp310-cp310-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118"
+ else:
+ raise RuntimeError(f"Unsupported CUDA version: {cuda_version}")
+ subprocess.check_call([sys.executable, "-m", "pip", "install", pip_cmd])
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid