diff --git a/docs/quickstart.md b/docs/quickstart.md
index 3baed00d..0d7f71a0 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -118,6 +118,17 @@ Ultralytics provides various installation methods including pip, conda, and Dock
See the `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) file for a list of dependencies. Note that all examples above install all required dependencies.
+
+
+
+
+ Watch: Ultralytics YOLO for Object Detection: Quickstart Guide for Installation and Setup.
+
+
!!! tip "Tip"
PyTorch requirements vary by operating system and CUDA requirements, so it's recommended to install PyTorch first following instructions at [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally).
diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py
index 40871044..68acd22f 100644
--- a/ultralytics/models/sam/model.py
+++ b/ultralytics/models/sam/model.py
@@ -61,15 +61,14 @@ class SAM(Model):
Performs segmentation prediction on the given image or video source.
Args:
- source: Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
+ source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
stream (bool, optional): If True, enables real-time streaming. Defaults to False.
bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None.
points (list, optional): List of points for prompted segmentation. Defaults to None.
labels (list, optional): List of labels for prompted segmentation. Defaults to None.
- **kwargs: Additional keyword arguments.
Returns:
- The segmentation masks.
+ (list): The model predictions.
"""
overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024)
kwargs.update(overrides)
@@ -81,15 +80,14 @@ class SAM(Model):
Alias for the 'predict' method.
Args:
- source: Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
+ source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
stream (bool, optional): If True, enables real-time streaming. Defaults to False.
bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None.
points (list, optional): List of points for prompted segmentation. Defaults to None.
labels (list, optional): List of labels for prompted segmentation. Defaults to None.
- **kwargs: Additional keyword arguments.
Returns:
- The segmentation masks.
+ (list): The model predictions.
"""
return self.predict(source, stream, bboxes, points, labels, **kwargs)
@@ -112,6 +110,6 @@ class SAM(Model):
Provides a mapping from the 'segment' task to its corresponding 'Predictor'.
Returns:
- dict: A dictionary mapping the 'segment' task to its corresponding 'Predictor'.
+ (dict): A dictionary mapping the 'segment' task to its corresponding 'Predictor'.
"""
return {'segment': {'predictor': Predictor}}
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index 8dce2be7..94362ecc 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -77,7 +77,7 @@ class Predictor(BasePredictor):
im (torch.Tensor | List[np.ndarray]): BCHW tensor format or list of HWC numpy arrays.
Returns:
- torch.Tensor: The preprocessed image tensor.
+ (torch.Tensor): The preprocessed image tensor.
"""
if self.im is not None:
return self.im
@@ -105,7 +105,7 @@ class Predictor(BasePredictor):
im (List[np.ndarray]): List containing images in HWC numpy array format.
Returns:
- List[np.ndarray]: List of transformed images.
+ (List[np.ndarray]): List of transformed images.
"""
assert len(im) == 1, 'SAM model does not currently support batched inference'
letterbox = LetterBox(self.args.imgsz, auto=False, center=False)
@@ -126,7 +126,7 @@ class Predictor(BasePredictor):
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
Returns:
- tuple: Contains the following three elements.
+ (tuple): Contains the following three elements.
- np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks.
- np.ndarray: An array of length C containing quality scores predicted by the model for each mask.
- np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256.
@@ -155,7 +155,7 @@ class Predictor(BasePredictor):
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
Returns:
- tuple: Contains the following three elements.
+ (tuple): Contains the following three elements.
- np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks.
- np.ndarray: An array of length C containing quality scores predicted by the model for each mask.
- np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256.
@@ -234,7 +234,7 @@ class Predictor(BasePredictor):
crop_nms_thresh (float): IoU cutoff for Non-Maximum Suppression (NMS) to remove duplicate masks between crops.
Returns:
- tuple: A tuple containing segmented masks, confidence scores, and bounding boxes.
+ (tuple): A tuple containing segmented masks, confidence scores, and bounding boxes.
"""
self.segment_all = True
ih, iw = im.shape[2:]
@@ -434,9 +434,9 @@ class Predictor(BasePredictor):
nms_thresh (float): The IoU threshold for the NMS algorithm. Defaults to 0.7.
Returns:
- T(uple[torch.Tensor, List[int]]):
- - new_masks (torch.Tensor): The processed masks with small regions removed. Shape is (N, H, W).
- - keep (List[int]): The indices of the remaining masks post-NMS, which can be used to filter the boxes.
+ (tuple([torch.Tensor, List[int]])):
+ - new_masks (torch.Tensor): The processed masks with small regions removed. Shape is (N, H, W).
+ - keep (List[int]): The indices of the remaining masks post-NMS, which can be used to filter the boxes.
"""
if len(masks) == 0:
return masks