diff --git a/docs/quickstart.md b/docs/quickstart.md index 3baed00d..0d7f71a0 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -118,6 +118,17 @@ Ultralytics provides various installation methods including pip, conda, and Dock See the `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) file for a list of dependencies. Note that all examples above install all required dependencies. +

+
+ +
+ Watch: Ultralytics YOLO for Object Detection: Quickstart Guide for Installation and Setup. +

+ !!! tip "Tip" PyTorch requirements vary by operating system and CUDA requirements, so it's recommended to install PyTorch first following instructions at [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py index 40871044..68acd22f 100644 --- a/ultralytics/models/sam/model.py +++ b/ultralytics/models/sam/model.py @@ -61,15 +61,14 @@ class SAM(Model): Performs segmentation prediction on the given image or video source. Args: - source: Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. + source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. stream (bool, optional): If True, enables real-time streaming. Defaults to False. bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None. points (list, optional): List of points for prompted segmentation. Defaults to None. labels (list, optional): List of labels for prompted segmentation. Defaults to None. - **kwargs: Additional keyword arguments. Returns: - The segmentation masks. + (list): The model predictions. """ overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024) kwargs.update(overrides) @@ -81,15 +80,14 @@ class SAM(Model): Alias for the 'predict' method. Args: - source: Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. + source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. stream (bool, optional): If True, enables real-time streaming. Defaults to False. bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None. points (list, optional): List of points for prompted segmentation. Defaults to None. labels (list, optional): List of labels for prompted segmentation. Defaults to None. - **kwargs: Additional keyword arguments. Returns: - The segmentation masks. + (list): The model predictions. """ return self.predict(source, stream, bboxes, points, labels, **kwargs) @@ -112,6 +110,6 @@ class SAM(Model): Provides a mapping from the 'segment' task to its corresponding 'Predictor'. Returns: - dict: A dictionary mapping the 'segment' task to its corresponding 'Predictor'. + (dict): A dictionary mapping the 'segment' task to its corresponding 'Predictor'. """ return {'segment': {'predictor': Predictor}} diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 8dce2be7..94362ecc 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -77,7 +77,7 @@ class Predictor(BasePredictor): im (torch.Tensor | List[np.ndarray]): BCHW tensor format or list of HWC numpy arrays. Returns: - torch.Tensor: The preprocessed image tensor. + (torch.Tensor): The preprocessed image tensor. """ if self.im is not None: return self.im @@ -105,7 +105,7 @@ class Predictor(BasePredictor): im (List[np.ndarray]): List containing images in HWC numpy array format. Returns: - List[np.ndarray]: List of transformed images. + (List[np.ndarray]): List of transformed images. """ assert len(im) == 1, 'SAM model does not currently support batched inference' letterbox = LetterBox(self.args.imgsz, auto=False, center=False) @@ -126,7 +126,7 @@ class Predictor(BasePredictor): multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False. Returns: - tuple: Contains the following three elements. + (tuple): Contains the following three elements. - np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks. - np.ndarray: An array of length C containing quality scores predicted by the model for each mask. - np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256. @@ -155,7 +155,7 @@ class Predictor(BasePredictor): multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False. Returns: - tuple: Contains the following three elements. + (tuple): Contains the following three elements. - np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks. - np.ndarray: An array of length C containing quality scores predicted by the model for each mask. - np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256. @@ -234,7 +234,7 @@ class Predictor(BasePredictor): crop_nms_thresh (float): IoU cutoff for Non-Maximum Suppression (NMS) to remove duplicate masks between crops. Returns: - tuple: A tuple containing segmented masks, confidence scores, and bounding boxes. + (tuple): A tuple containing segmented masks, confidence scores, and bounding boxes. """ self.segment_all = True ih, iw = im.shape[2:] @@ -434,9 +434,9 @@ class Predictor(BasePredictor): nms_thresh (float): The IoU threshold for the NMS algorithm. Defaults to 0.7. Returns: - T(uple[torch.Tensor, List[int]]): - - new_masks (torch.Tensor): The processed masks with small regions removed. Shape is (N, H, W). - - keep (List[int]): The indices of the remaining masks post-NMS, which can be used to filter the boxes. + (tuple([torch.Tensor, List[int]])): + - new_masks (torch.Tensor): The processed masks with small regions removed. Shape is (N, H, W). + - keep (List[int]): The indices of the remaining masks post-NMS, which can be used to filter the boxes. """ if len(masks) == 0: return masks