51 lines
1.4 KiB
Python
Executable File
51 lines
1.4 KiB
Python
Executable File
import json
|
|
import io
|
|
import base64
|
|
import math
|
|
from PIL import Image
|
|
import requests
|
|
from dots_ocr.utils.image_utils import PILimage_to_base64
|
|
from openai import OpenAI
|
|
import os
|
|
|
|
|
|
def inference_with_vllm(
|
|
image,
|
|
prompt,
|
|
ip="localhost",
|
|
port=8000,
|
|
temperature=0.1,
|
|
top_p=0.9,
|
|
max_completion_tokens=32768,
|
|
model_name='model',
|
|
):
|
|
|
|
addr = f"http://{ip}:{port}/v1"
|
|
client = OpenAI(api_key="{}".format(os.environ.get("API_KEY", "0")), base_url=addr)
|
|
messages = []
|
|
messages.append(
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": PILimage_to_base64(image)},
|
|
},
|
|
{"type": "text", "text": f"<|img|><|imgpad|><|endofimg|>{prompt}"} # if no "<|img|><|imgpad|><|endofimg|>" here,vllm v1 will add "\n" here
|
|
],
|
|
}
|
|
)
|
|
try:
|
|
response = client.chat.completions.create(
|
|
messages=messages,
|
|
model=model_name,
|
|
max_completion_tokens=max_completion_tokens,
|
|
temperature=temperature,
|
|
top_p=top_p)
|
|
response = response.choices[0].message.content
|
|
return response
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"request error: {e}")
|
|
return None
|
|
|