Spaces:

Harshithtd
/

MultiModalModel

Runtime error

App Files Files Community

Harshithtd commited on Sep 12, 2024

Commit

97429f0

verified ·

1 Parent(s): 7a3b1ec

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+from vllm import LLM, SamplingParams
+import gradio as gr
+from PIL import Image
+from io import BytesIO
+import base64
+import requests
+from huggingface_hub import login
+import os
+login(os.environ["HF_TOKEN"])
+repo_id = "mistral-community/pixtral-12b-240910" #Replace to the model you would like to use
+sampling_params = SamplingParams(max_tokens=8192, temperature=0.7)
+max_tokens_per_img = 4096
+max_img_per_msg = 5
+llm = LLM(model="mistralai/Pixtral-12B-2409",
+          tokenizer_mode="mistral",
+          max_model_len=65536,
+          max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
+          limit_mm_per_prompt={"image": max_img_per_msg})  # Name or path of your model
+def encode_image(image: Image.Image, image_format="PNG") -> str:
+    im_file = BytesIO()
+    image.save(im_file, format=image_format)
+    im_bytes = im_file.getvalue()
+    im_64 = base64.b64encode(im_bytes).decode("utf-8")
+    return im_64
+# @spaces.GPU #[uncomment to use ZeroGPU]
+def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
+    image = Image.open(BytesIO(requests.get(image_url).content))
+    image = image.resize((3844, 2408))
+    new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
+    messages = [
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": new_image_url}}]
+        },
+    ]
+    outputs = llm.chat(messages, sampling_params=sampling_params)
+    return outputs[0].outputs[0].text
+examples = [["https://picsum.photos/id/237/200/300", "What do you see in this image?"]]
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 640px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""
+        # Mistral Pixtral 12B
+        """)
+        with gr.Row():
+            prompt = gr.Text(
+                label="Prompt",
+                show_label=False,
+                max_lines=2,
+                placeholder="Enter your prompt",
+                container=False,
+            )
+        with gr.Row():
+            image_url = gr.Text(
+                label="Image URL",
+                show_label=False,
+                max_lines=1,
+                placeholder="Enter your image URL",
+                container=False,
+            )
+        with gr.Row():
+            run_button = gr.Button("Run", scale=0)
+        result = gr.Textbox(
+            show_label=False
+        )
+        gr.Examples(
+            examples=examples,
+            inputs=[image_url, prompt]
+        )
+    gr.on(
+        triggers=[run_button.click, image_url.submit, prompt.submit],
+        fn=infer,
+        inputs=[image_url, prompt],
+        outputs=[result]
+    )
+demo.queue().launch()