Add NNCF support in OpenVINO export (#4671)

Co-authored-by: 下北泽miHoMo红茶坊 <39751846+kisaragychihaya@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-07-05 20:34:21 +08:00 · 2023-08-31 15:19:44 +02:00 · 2023-08-31 15:19:44 +02:00 · 45ba99973d
commit 45ba99973d
parent d2cf7acce0
4 changed files with 72 additions and 45 deletions
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@ -8,18 +8,25 @@ on:
    branches: [main]
  workflow_dispatch:
    inputs:
-      dockerfile:
+      Dockerfile:
-        type: choice
+        type: boolean
-        description: Select Dockerfile
+        description: Use Dockerfile
-        options:
+        default: true
-          - Dockerfile-arm64
+      Dockerfile-cpu:
-          - Dockerfile-jetson
+        type: boolean
-          - Dockerfile-python
+        description: Use Dockerfile-cpu
-          - Dockerfile-cpu
+      Dockerfile-arm64:
-          - Dockerfile
+        type: boolean
        description: Use Dockerfile-arm64
      Dockerfile-jetson:
        type: boolean
        description: Use Dockerfile-jetson
      Dockerfile-python:
        type: boolean
        description: Use Dockerfile-python
      push:
        type: boolean
-        description: Push image to Docker Hub
+        description: Push images to Docker Hub
        default: true
 jobs:
@ -93,28 +100,28 @@ jobs:
          VERSION_TAG: ${{ steps.get_version.outputs.version_tag }}
      - name: Build Image
-        if: github.event_name == 'push' || github.event.inputs.dockerfile == matrix.dockerfile
+        if: github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true'
        run: |
          docker build --platform ${{ matrix.platforms }} -f docker/${{ matrix.dockerfile }} \
          -t ultralytics/ultralytics:${{ matrix.tags }} \
          -t ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }} .
      - name: Run Tests
-        if: (github.event_name == 'push' || github.event.inputs.dockerfile == matrix.dockerfile) && matrix.platforms == 'linux/amd64'  # arm64 images not supported on GitHub CI runners
+        if: (github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true') && matrix.platforms == 'linux/amd64'  # arm64 images not supported on GitHub CI runners
        run: docker run ultralytics/ultralytics:${{ matrix.tags }} /bin/bash -c "pip install pytest && pytest tests"
      - name: Run Benchmarks
        # WARNING: Dockerfile (GPU) error on TF.js export 'module 'numpy' has no attribute 'object'.
-        if: (github.event_name == 'push' || github.event.inputs.dockerfile == matrix.dockerfile) && matrix.platforms == 'linux/amd64' && matrix.dockerfile != 'Dockerfile'  # arm64 images not supported on GitHub CI runners
+        if: (github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true') && matrix.platforms == 'linux/amd64' && matrix.dockerfile != 'Dockerfile'  # arm64 images not supported on GitHub CI runners
        run: docker run ultralytics/ultralytics:${{ matrix.tags }} yolo benchmark model=yolov8n.pt imgsz=160 verbose=0.26
      - name: Push Docker Image with Ultralytics version tag
-        if: (github.event_name == 'push' || (github.event.inputs.dockerfile == matrix.dockerfile && github.event.inputs.push == 'true')) && steps.check_tag.outputs.exists == 'false'
+        if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.exists == 'false'
        run: |
          docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}
      - name: Push Docker Image with latest tag
-        if: github.event_name == 'push' || (github.event.inputs.dockerfile == matrix.dockerfile && github.event.inputs.push == 'true')
+        if: github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')
        run: |
          docker push ultralytics/ultralytics:${{ matrix.tags }}
          if [[ "${{ matrix.tags }}" == "latest" ]]; then
--- a/docker/Dockerfile-cpu
+++ b/docker/Dockerfile-cpu
@ -18,8 +18,8 @@ RUN apt update \
 WORKDIR /usr/src/ultralytics
 # Copy contents
-# COPY . /usr/src/app  (issues as not a .git directory)
+COPY . /usr/src/ultralytics
-RUN git clone https://github.com/ultralytics/ultralytics /usr/src/ultralytics
+# RUN git clone https://github.com/ultralytics/ultralytics /usr/src/ultralytics
 ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/
 # Remove python3.11/EXTERNALLY-MANAGED or use 'pip install --break-system-packages' avoid 'externally-managed-environment' Ubuntu nightly error
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@ -65,12 +65,7 @@ class Compose:
    def __repr__(self):
        """Return string representation of object."""
-        format_string = f'{self.__class__.__name__}('
+        return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
        for t in self.transforms:
            format_string += '\n'
            format_string += f'    {t}'
        format_string += '\n)'
        return format_string
 class BaseMixTransform:
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@ -58,9 +58,12 @@ from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 import numpy as np
 import torch
 from ultralytics.cfg import get_cfg
 from ultralytics.data.dataset import YOLODataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.nn.autobackend import check_class_names
 from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder
 from ultralytics.nn.tasks import DetectionModel, SegmentationModel
@ -275,10 +278,11 @@ class Exporter:
                                  f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
            imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(' ', '')
            predict_data = f'data={data}' if model.task == 'segment' and format == 'pb' else ''
            q = 'int8' if self.args.int8 else 'half' if self.args.half else ''  # quantization
            LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
                        f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
-                        f'\nPredict:         yolo predict task={model.task} model={f} imgsz={imgsz} {predict_data}'
+                        f'\nPredict:         yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}'
-                        f'\nValidate:        yolo val task={model.task} model={f} imgsz={imgsz} data={data} {s}'
+                        f'\nValidate:        yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}'
                        f'\nVisualize:       https://netron.app')
        self.run_callbacks('on_export_end')
@ -367,27 +371,54 @@ class Exporter:
        LOGGER.info(f'\n{prefix} starting export with openvino {ov.__version__}...')
        f = str(self.file).replace(self.file.suffix, f'_openvino_model{os.sep}')
        fq = str(self.file).replace(self.file.suffix, f'_int8_openvino_model{os.sep}')
        f_onnx = self.file.with_suffix('.onnx')
        f_ov = str(Path(f) / self.file.with_suffix('.xml').name)
        fq_ov = str(Path(fq) / self.file.with_suffix('.xml').name)
        def serialize(ov_model, file):
            """Set RT info, serialize and save metadata YAML."""
            ov_model.set_rt_info('YOLOv8', ['model_info', 'model_type'])
            ov_model.set_rt_info(True, ['model_info', 'reverse_input_channels'])
            ov_model.set_rt_info(114, ['model_info', 'pad_value'])
            ov_model.set_rt_info([255.0], ['model_info', 'scale_values'])
            ov_model.set_rt_info(self.args.iou, ['model_info', 'iou_threshold'])
            ov_model.set_rt_info([v.replace(' ', '_') for v in self.model.names.values()], ['model_info', 'labels'])
            if self.model.task != 'classify':
                ov_model.set_rt_info('fit_to_window_letterbox', ['model_info', 'resize_type'])
            ov.serialize(ov_model, file)  # save
            yaml_save(Path(file).parent / 'metadata.yaml', self.metadata)  # add metadata.yaml
        ov_model = mo.convert_model(f_onnx,
                                    model_name=self.pretty_name,
                                    framework='onnx',
                                    compress_to_fp16=self.args.half)  # export
-        # Set RT info
+        if self.args.int8:
-        ov_model.set_rt_info('YOLOv8', ['model_info', 'model_type'])
+            assert self.args.data, "INT8 export requires a data argument for calibration, i.e. 'data=coco8.yaml'"
-        ov_model.set_rt_info(True, ['model_info', 'reverse_input_channels'])
+            check_requirements('nncf>=2.5.0')
-        ov_model.set_rt_info(114, ['model_info', 'pad_value'])
+            import nncf
        ov_model.set_rt_info([255.0], ['model_info', 'scale_values'])
        ov_model.set_rt_info(self.args.iou, ['model_info', 'iou_threshold'])
        ov_model.set_rt_info([v.replace(' ', '_') for k, v in sorted(self.model.names.items())],
                             ['model_info', 'labels'])
        if self.model.task != 'classify':
            ov_model.set_rt_info('fit_to_window_letterbox', ['model_info', 'resize_type'])
-        ov.serialize(ov_model, f_ov)  # save
+            def transform_fn(data_item):
-        yaml_save(Path(f) / 'metadata.yaml', self.metadata)  # add metadata.yaml
+                """Quantization transform function."""
                im = data_item['img'].numpy().astype(np.float32) / 255.0  # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0
                return np.expand_dims(im, 0) if im.ndim == 3 else im
            # Generate calibration data for integer quantization
            LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
            data = check_det_dataset(self.args.data)
            dataset = YOLODataset(data['val'], data=data, imgsz=self.imgsz[0], augment=False)
            quantization_dataset = nncf.Dataset(dataset, transform_fn)
            ignored_scope = nncf.IgnoredScope(types=['Multiply', 'Subtract', 'Sigmoid'])  # ignore operation
            quantized_ov_model = nncf.quantize(ov_model,
                                               quantization_dataset,
                                               preset=nncf.QuantizationPreset.MIXED,
                                               ignored_scope=ignored_scope)
            serialize(quantized_ov_model, fq_ov)
            return fq, None
        serialize(ov_model, f_ov)
        return f, None
    @try_export
@ -633,19 +664,13 @@ class Exporter:
        if self.args.int8:
            verbosity = '--verbosity info'
            if self.args.data:
                import numpy as np
                from ultralytics.data.dataset import YOLODataset
                from ultralytics.data.utils import check_det_dataset
                # Generate calibration data for integer quantization
                LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
                data = check_det_dataset(self.args.data)
                dataset = YOLODataset(data['val'], data=data, imgsz=self.imgsz[0], augment=False)
                images = []
-                n_images = 100  # maximum number of images
+                for i, batch in enumerate(dataset):
-                for n, batch in enumerate(dataset):
+                    if i >= 100:  # maximum number of calibration images
                    if n >= n_images:
                        break
                    im = batch['img'].permute(1, 2, 0)[None]  # list to nparray, CHW to BHWC
                    images.append(im)