# Ackownledgement: https://huggingface.co/spaces/kadirnar/Yolov10/blob/main/app.py # Thanks to @kadirnar import gradio as gr from ultralytics import YOLOv10 import cv2 import tempfile def yolov10_inference(image, model_path, image_size, conf_threshold): model = YOLOv10(model_path) results = model.predict(source=image, imgsz=image_size, conf=conf_threshold) annotated_image = results[0].plot() return annotated_image[:, :, ::-1], None def yolov10_inference_video(video, model_path, image_size, conf_threshold): model = YOLOv10(model_path) video_path = tempfile.mktemp(suffix=".mp4") with open(video_path, "wb") as f: with open(video, "rb") as g: f.write(g.read()) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video_path = tempfile.mktemp(suffix=".mp4") out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold) annotated_frame = results[0].plot() out.write(annotated_frame) cap.release() out.release() return None, output_video_path def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): input_type = gr.Radio( choices=["Image", "Video"], value="Image", label="Input Type", ) image = gr.Image(type="pil", label="Image", visible=True) video = gr.Video(label="Video", visible=False) model_id = gr.Dropdown( label="Model", choices=[ "yolov10n.pt", "yolov10s.pt", "yolov10m.pt", "yolov10b.pt", "yolov10l.pt", "yolov10x.pt", ], value="yolov10s.pt", ) image_size = gr.Slider( label="Image Size", minimum=320, maximum=1280, step=32, value=640, ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.1, value=0.25, ) yolov10_infer = gr.Button(value="Detect Objects") with gr.Column(): output_image = gr.Image(type="numpy", label="Annotated Image", visible=True) output_video = gr.Video(label="Annotated Video", visible=False) def update_visibility(input_type): image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) return image, video, output_image, output_video input_type.change( fn=update_visibility, inputs=[input_type], outputs=[image, video, output_image, output_video], ) def run_inference(image, video, model_id, image_size, conf_threshold, input_type): if input_type == "Image": return yolov10_inference(image, model_id, image_size, conf_threshold) else: return yolov10_inference_video(video, model_id, image_size, conf_threshold) yolov10_infer.click( fn=run_inference, inputs=[image, video, model_id, image_size, conf_threshold, input_type], outputs=[output_image, output_video], ) gr.Examples( examples=[ [ "ultralytics/assets/bus.jpg", "yolov10s.pt", 640, 0.25, ], [ "ultralytics/assets/zidane.jpg", "yolov10s.pt", 640, 0.25, ], ], fn=yolov10_inference, inputs=[ image, model_id, image_size, conf_threshold, ], outputs=[output_image], cache_examples=True, ) gradio_app = gr.Blocks() with gradio_app: gr.HTML( """

YOLOv10: Real-Time End-to-End Object Detection

""") with gr.Row(): with gr.Column(): app() gradio_app.launch(debug=True)