diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 00000000..d580401b
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,26 @@
+codecov:
+ notify:
+ require_ci_to_pass: yes
+
+coverage:
+ precision: 2
+ round: down
+ range: "30...100"
+
+ status:
+ project: yes
+ patch: yes
+ changes: no
+
+parsers:
+ gcov:
+ branch_detection:
+ conditional: yes
+ loop: yes
+ method: no
+ macro: no
+
+comment:
+ layout: "header, diff"
+ behavior: default
+ require_changes: no
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..50cd06f5
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: OlafenwaMoses
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000..5d70f649
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,232 @@
+name: Build and Testing
+
+on:
+ push:
+ branches: [master]
+ pull_request:
+ branches: [master]
+
+jobs:
+ UnitestPython37:
+
+ name: Python3.7 Tests
+ runs-on: ubuntu-latest
+ # needs: None
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.7'
+ cache: 'pip'
+ - name: Install Dependencies
+ run: |
+ pip install -r requirements.txt
+ pip install -r requirements_extra.txt
+ - name: Download and Setup Resources
+ env:
+ CI: false
+ run: |
+ sudo apt-get update
+ sudo apt-get install unzip -y
+
+ mkdir test/data-models
+ mkdir test/data-json
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
+
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test
+
+ unzip test/data-datasets.zip -d test
+ unzip test/data-images.zip -d test
+ unzip test/data-videos.zip -d test
+ - name: Run Unittest
+ run: |
+ pytest test -vvv
+ UnitestPython38:
+
+ name: Python3.8 Tests
+ runs-on: ubuntu-latest
+ # needs: None
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.8'
+ cache: 'pip'
+ - name: Install Dependencies
+ run: |
+ pip install -r requirements.txt
+ pip install -r requirements_extra.txt
+ - name: Download and Setup Resources
+ env:
+ CI: false
+ run: |
+ sudo apt-get update
+ sudo apt-get install unzip -y
+
+ mkdir test/data-models
+ mkdir test/data-json
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
+
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test
+
+ unzip test/data-datasets.zip -d test
+ unzip test/data-images.zip -d test
+ unzip test/data-videos.zip -d test
+ - name: Run Unittest
+ run: |
+ pytest test -vvv
+
+ UnitestPython39:
+
+ name: Python3.9 Tests
+ runs-on: ubuntu-latest
+ # needs: None
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.9'
+ cache: 'pip'
+ - name: Install Dependencies
+ run: |
+ pip install -r requirements.txt
+ pip install -r requirements_extra.txt
+ - name: Download and Setup Resources
+ env:
+ CI: false
+ run: |
+ sudo apt-get update
+ sudo apt-get install unzip -y
+
+ mkdir test/data-models
+ mkdir test/data-json
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
+
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test
+
+ unzip test/data-datasets.zip -d test
+ unzip test/data-images.zip -d test
+ unzip test/data-videos.zip -d test
+ - name: Run Unittest
+ run: |
+ pytest test -vvv
+
+ UnitestPython310:
+
+ name: Python3.10 Tests
+ runs-on: ubuntu-latest
+ # needs: None
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ cache: 'pip'
+ - name: Install Dependencies
+ run: |
+ pip install -r requirements.txt
+ pip install -r requirements_extra.txt
+ - name: Download and Setup Resources
+ env:
+ CI: false
+ run: |
+ sudo apt-get update
+ sudo apt-get install unzip -y
+
+ mkdir test/data-models
+ mkdir test/data-json
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
+
+
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
+ wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test
+
+ unzip test/data-datasets.zip -d test
+ unzip test/data-images.zip -d test
+ unzip test/data-videos.zip -d test
+ - name: Run Unittest
+ run: |
+ pytest test -vvv
+
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..16d09085
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,169 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+
+# Other files and folders
+test/data-models
+test/data-images
+test/data-json
+test/data-videos
+test/data-datasets
+experiment
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..4187bad0
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,33 @@
+dist: xenial
+sudo: required
+language: python
+python:
+ - '3.7.6'
+install:
+ - pip install -r requirements.txt
+ - pip install pytest
+ - pip install pytest-cov
+script:
+ - python setup.py install
+ - cd test
+ - mkdir data-models
+ - mkdir data-temp
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/DenseNet-BC-121-32.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_imagenet_tf.2.0.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/mobilenet_v2.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/models-v3/idenprof_densenet-0.763500.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/models-v3/idenprof_full_resnet_ex-001_acc-0.119792.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof_resnet_ex-056_acc-0.993062.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/pretrained-yolov3.h5
+ - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+ - pytest -v --cov
+after_script:
+ - bash <(curl -s https://codecov.io/bash)
+
+
+
+
diff --git a/BACKEND_MIGRATION.md b/BACKEND_MIGRATION.md
new file mode 100644
index 00000000..354cd7c7
--- /dev/null
+++ b/BACKEND_MIGRATION.md
@@ -0,0 +1,40 @@
+# Overview
+
+In December 2022, ImageAI `3.0.2` was released which effected the change from Tensorflow backend to PyTorch backend. This change allows ImageAI to support `Python 3.7` up to `Python 3.10` for all its features and deprecates a number of functionalities for this and future versions of ImageAI.
+
+
+# Deprecated functionalities
+- Tensorflow backend no longer supported. Now replaced with PyTorch
+- All `.h5` pretrained models and custom trained `.h5` models no longer supported. If you still intend to use these models, see the `Using Tensorflow backend` section.
+- `Speed mode` have been removed from model loading
+- Custom detection model training dataset format changed to YOLO format from Pascal VOC. To convert your dataset to YOLO format, see the `Convert Pascal VOC dataset to YOLO format` section.
+- Enhance data for custom classification model training now removed
+- Detection model training standalone evaluation now removed
+
+# Using Tensorflow backend
+To use Tensorflow backend, do the following
+
+- Install Python 3.7
+- Install Tensorflow
+ - CPU: `pip install tensorflow==2.4.0`
+ - GPU: `pip install tensorflow-gpu==2.4.0`
+- Install other dependencies: `pip install keras==2.4.3 numpy==1.19.3 pillow==7.0.0 scipy==1.4.1 h5py==2.10.0 matplotlib==3.3.2 opencv-python keras-resnet==0.2.0`
+- Install ImageAI **2.1.6**: `pip install imageai==2.1.6`
+- Download the Tensorflow models from the releases below
+ - [Models for Image Recognition and Object Detection](https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0)
+ - [TF2.x Models [ Exclusives ]](https://github.com/OlafenwaMoses/ImageAI/releases/tag/essentials-v5)
+
+
+
+# Convert Pascal VOC dataset to YOLO format
+Because ImageAI now uses `YOLO format` for training custom object detection models; should you need to train a new model with the new ImageAI version, you will need to convert your `Pascal VOC` datasets to YOLO format by doing the following
+- Run the command below
+ ```
+ python scripts/pascal_voc_to_yolo.py --dataset_dir
+ ```
+- Once completed, you will find the YOLO version of the dataset next to your Pascal VOC dataset.
+ - E.g, if your dataset is in `C:/Users/Troublemaker/Documents/datasets/headset`, your conversion command will be
+ ```
+ python scripts/pascal_voc_to_yolo.py --dataset_dir C:/Users/Troublemaker/Documents/datasets/headset
+ ```
+ and once completed, the output will be in `C:/Users/Troublemaker/Documents/datasets/headset-yolo`
diff --git a/LICENSE b/LICENSE
index c54f6b53..2bd5f3b8 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2018 MOSES OLAFENWA
+Copyright (c) 2019 MOSES OLAFENWA
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..02cc49a4
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include imageai/Detection *.txt
+recursive-include imageai/Classification *.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index 1b9e8441..72f80977 100644
--- a/README.md
+++ b/README.md
@@ -1,356 +1,332 @@
-# ImageAI
-A python library built to empower developers to build applications and systems with self-contained Deep Learning and Computer Vision capabilities using simple
- and few lines of code.
+# ImageAI (v3.0.3)
-
-An AI Commons project https://aicommons.science
-Developed and Maintained by [Moses Olafenwa](https://twitter.com/OlafenwaMoses) and [John Olafenwa](https://twitter.com/johnolafenwa), brothers, creators of [TorchFusion](https://github.com/johnolafenwa/TorchFusion)
-and Authors of [Introduction to Deep Computer Vision](https://john.aicommons.science/deepvision)
-
-Built with simplicity in mind, ImageAI
- supports a list of state-of-the-art Machine Learning algorithms for image prediction, custom image prediction, object detection, video detection, video object tracking
- and image predictions trainings. ImageAI currently supports image prediction and training using 4 different Machine Learning algorithms
- trained on the ImageNet-1000 dataset. ImageAI also supports object detection, video detection and object tracking using RetinaNet, YOLOv3 and TinyYOLOv3 trained on COCO dataset.
- Eventually, ImageAI will provide support for a wider
- and more specialized aspects of Computer Vision including and not limited to image
- recognition in special environments and special fields.
-
-
-
-New Release : ImageAI 2.0.2
- What's new:
-
-- Option to state image size during custom image prediction model trainings
-- Object Detection and Video Object detection now returns bounding box coordinates **('box points')** (x1,y1,x2, y2) for each object detected in addition to object's 'name' and 'percentage probability'
-- Options to hide 'percentage probability' and/or object 'name' from being shown in detected image or video
-- Support for video object detection on video live stream from device camera, connected camera and IP camera
-- Support for **YOLOv3** and **TinyYOLOv3** for all object detection and video object detection tasks.
-- Video object detection for all input types (video file and camera) now allows defining custom functions to execute after each frame, each second and each minute of the video is detected and processed. Also include option to specify custom function at once video is fully detected and processed
-- For each custom function specified, **ImageAI** returns the **frame**/**seconds**/**minute**/**full video analysis** of the detections that include the objects' details ( **name** , **percentage** **probability**, **box_points**), number of instance of each unique object detected (counts) and overall average count of the number of instance of each unique object detected in the case of **second** / **minute** / **full video analysis**
-- Options to return detected frame at every frame, second or minute processed as a **Numpy array**.
-
To use ImageAI in your application developments, you must have installed the following
- dependencies before you install ImageAI :
+[](https://travis-ci.com/OlafenwaMoses/ImageAI) [](https://github.com/OlafenwaMoses/ImageAI/blob/master/LICENSE) [](https://badge.fury.io/py/imageai) [](https://pepy.tech/project/imageai) [](https://pepy.tech/project/imageai)
+
+An open-source python library built to empower developers to build applications and systems with self-contained Deep Learning and Computer Vision capabilities using simple and few lines of code.
+ If you will like to sponsor this project, kindly visit the [Github sponsor page](https://github.com/sponsors/OlafenwaMoses).
-
- - Python 3.5.1 (and later versions) Download (Support for Python 2.7 coming soon)
- - pip3 Install
- - Tensorflow 1.4.0 (and later versions) Install or install via pip
pip3 install --upgrade tensorflow
- - Numpy 1.13.1 (and later versions) Install or install via pip
pip3 install numpy
- - SciPy 0.19.1 (and later versions) Install or install via pip
-
- or download the Python Wheel
- imageai-2.0.2-py3-none-any.whl and run the python installation instruction in the command line
- to the path of the file like the one below:
-
-ImageAI provides 4 different algorithms and model types to perform image prediction, trained on the ImageNet-1000 dataset.
-The 4 algorithms provided for image prediction include SqueezeNet, ResNet, InceptionV3 and DenseNet.
-
-Click the link below to see the full sample codes, explanations and best practices guide.
-
-
-
-
-
-
-
-
-
-
-ImageAI provides very convenient and powerful methods
- to perform object detection on images and extract each object from the image. The object detection class provides support for RetinaNet, YOLOv3 and TinyYOLOv3, with options to adjust for state of the art performance or real time processing.
-
-Click the link below to see the full sample codes, explanations and best practices guide.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Video Object Detection and Tracking
-
-
-
Video Object Detection & Analysis
-
Below is a snapshot of a video with objects detected.
-
-
Video Custom Object Detection (Object Tracking)
-
Below is a snapshot of a video with only person, bicycle and motorcyle detected.
-
-
Video Analysis Visualization
-
Below is a visualization of video analysis returned by **ImageAI** into a 'per_second' function.
-
-
-
-ImageAI provides very convenient and powerful methods
- to perform object detection in videos and track specific object(s). The video object detection class provided only supports
- the current state-of-the-art RetinaNet, but with options to adjust for state of the art performance or real time processing.
-Click the link to see the full videos, sample codes, explanations and best practices guide.
-
-
-
-
-
-
-
-
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
-
-
Custom Model Training
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
-
-
-
A sample from the IdenProf Dataset used to train a Model for predicting professionals.
-
-
+[](https://jarvis.genxr.co)
-
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
-ImageAI provides classes and methods for you to train a new model that can be used to perform prediction on your own custom objects.
-You can train your custom models using SqueezeNet, ResNet50, InceptionV3 and DenseNet in 5 lines of code.
-Click the link below to see the guide to preparing training images, sample training codes, explanations and best practices.
-
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
-
+[](https://www.genxr.co/theia-engine)
-
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
-
-
Custom Image Prediction
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+
-
-
Prediction from a sample model trained on IdenProf, for predicting professionals
+Developed and maintained by [Moses Olafenwa](https://twitter.com/OlafenwaMoses)
+---
-
+Built with simplicity in mind, **ImageAI**
+ supports a list of state-of-the-art Machine Learning algorithms for image prediction, custom image prediction, object detection, video detection, video object tracking
+ and image predictions trainings. **ImageAI** currently supports image prediction and training using 4 different Machine Learning algorithms
+ trained on the ImageNet-1000 dataset. **ImageAI** also supports object detection, video detection and object tracking using RetinaNet, YOLOv3 and TinyYOLOv3 trained on COCO dataset. Finally, **ImageAI** allows you to train custom models for performing detection and recognition of new objects.
+
+Eventually, **ImageAI** will provide support for a wider and more specialized aspects of Computer Vision
-ImageAI provides classes and methods for you to run image prediction your own custom objects using your own model trained with ImageAI Model Training class.
-You can use your custom models trained with SqueezeNet, ResNet50, InceptionV3 and DenseNet and the JSON file containing the mapping of the custom object names.
-Click the link below to see the guide to sample training codes, explanations, and best practices guide.
-
+**New Release : ImageAI 3.0.2**
-
+What's new:
+- PyTorch backend
+- TinyYOLOv3 model training
+### TABLE OF CONTENTS
+- :white_square_button: Installation
+- :white_square_button: Features
+- :white_square_button: Documentation
+- :white_square_button: Sponsors
+- :white_square_button: Projects Built on ImageAI
+- :white_square_button: High Performance Implementation
+- :white_square_button: AI Practice Recommendations
+- :white_square_button: Contact Developers
+- :white_square_button: Citation
+- :white_square_button: References
+## Installation
+
+
+To install ImageAI, run the python installation instruction below in the command line:
+
+- [Download and Install](https://www.python.org/downloads/) **Python 3.7**, **Python 3.8**, **Python 3.9** or **Python 3.10**
+- Install dependencies
+ - **CPU**: Download [requirements.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements.txt) file and install via the command
+ ```
+ pip install -r requirements.txt
+ ```
+ or simply copy and run the command below
+
+ ```
+ pip install cython pillow>=7.0.0 numpy>=1.18.1 opencv-python>=4.1.2 torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cpu torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cpu pytest==7.1.3 tqdm==4.64.1 scipy>=1.7.3 matplotlib>=3.4.3 mock==4.0.3
+ ```
+
+ - **GPU/CUDA**: Download [requirements_gpu.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements_gpu.txt) file and install via the command
+ ```
+ pip install -r requirements_gpu.txt
+ ```
+ or smiply copy and run the command below
+ ```
+ pip install cython pillow>=7.0.0 numpy>=1.18.1 opencv-python>=4.1.2 torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cu102 torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cu102 pytest==7.1.3 tqdm==4.64.1 scipy>=1.7.3 matplotlib>=3.4.3 mock==4.0.3
+ ```
+- If you plan to train custom AI models, download [requirements_extra.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements_extra.txt) file and install via the command
+
+ ```
+ pip install -r requirements_extra.txt
+ ```
+ or simply copy and run the command below
+ ```
+ pip install pycocotools@git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI
+ ```
+- Then run the command below to install ImageAI
+ ```
+ pip install imageai --upgrade
+ ```
+
+## Features
+
+
+
+
Image Classification
+
+
+
+
ImageAI provides 4 different algorithms and model types to perform image prediction, trained on the ImageNet-1000 dataset. The 4 algorithms provided for image prediction include MobileNetV2, ResNet50, InceptionV3 and DenseNet121.
+ Click the link below to see the full sample codes, explanations and best practices guide.
ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class provides support for RetinaNet, YOLOv3 and TinyYOLOv3, with options to adjust for state of the art performance or real time processing. Click the link below to see the full sample codes, explanations and best practices guide.
ImageAI provides very convenient and powerful methods to perform object detection in videos. The video object detection class provided only supports the current state-of-the-art RetinaNet. Click the link to see the full videos, sample codes, explanations and best practices guide.
ImageAI provides classes and methods for you to train a new model that can be used to perform prediction on your own custom objects. You can train your custom models using MobileNetV2, ResNet50, InceptionV3 and DenseNet in 5 lines of code. Click the link below to see the guide to preparing training images, sample training codes, explanations and best practices.
ImageAI provides classes and methods for you to run image prediction your own custom objects using your own model trained with ImageAI Model Training class. You can use your custom models trained with MobileNetV2, ResNet50, InceptionV3 and DenseNet and the JSON file containing the mapping of the custom object names. Click the link below to see the guide to sample training codes, explanations, and best practices guide.
ImageAI provides classes and methods for you to train new YOLOv3 or TinyYOLOv3 object detection models on your custom dataset. This means you can train a model to detect literally any object of interest by providing the images, the annotations and training with ImageAI. Click the link below to see the guide to sample training codes, explanations, and best practices guide.
ImageAI now provides classes and methods for you detect and recognize your own custom objects in images using your own model trained with the DetectionModelTrainer class. You can use your custom trained YOLOv3 or TinyYOLOv3 model and the **.json** file generated during the training. Click the link below to see the guide to sample training codes, explanations, and best practices guide.
ImageAI now provides classes and methods for you detect and recognize your own custom objects in images using your own model trained with the DetectionModelTrainer class. You can use your custom trained YOLOv3 or TinyYOLOv3 model and the **.json** file generated during the training. Click the link below to see the guide to sample training codes, explanations, and best practices guide.
+
+## Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions. Visit the link below:
-
-
-
Documentation
+- Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
-We have provided full documentation for all ImageAI classes and functions in 2 major languages. Find links below:
- >> Documentation - English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
- >> Documentation - Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
+## Sponsors
+
-
+## Real-Time and High Performance Implementation
-
Real-Time and High Performance Implementation
-ImageAI provides abstracted and convenient implementations of state-of-the-art Computer Vision technologies. All of ImageAI implementations and code can work on any computer system with moderate CPU capacity. However, the speed of processing for operations like image prediction, object detection and others on CPU is slow and not suitable for real-time applications. To perform real-time Computer Vision operations with high performance, you need to use GPU enabled technologies.
-
-ImageAI uses the Tensorflow backbone for it's Computer Vision operations. Tensorflow supports both CPUs and GPUs ( Specifically NVIDIA GPUs. You can get one for your PC or get a PC that has one) for machine learning and artificial intelligence algorithms' implementations. To use Tensorflow that supports the use of GPUs, follow the link below :
-
+**ImageAI** provides abstracted and convenient implementations of state-of-the-art Computer Vision technologies. All of **ImageAI** implementations and code can work on any computer system with moderate CPU capacity. However, the speed of processing for operations like image prediction, object detection and others on CPU is slow and not suitable for real-time applications. To perform real-time Computer Vision operations with high performance, you need to use GPU enabled technologies.
-FOR macOS
-https://www.tensorflow.org/install/install_mac
+**ImageAI** uses the PyTorch backbone for it's Computer Vision operations. PyTorch supports both CPUs and GPUs ( Specifically NVIDIA GPUs. You can get one for your PC or get a PC that has one) for machine learning and artificial intelligence algorithms' implementations.
-FOR UBUNTU
-https://www.tensorflow.org/install/install_linux
-
+## Projects Built on ImageAI
-
Sample Applications
As a demonstration of what you can do with ImageAI, we have
- built a complete AI powered Photo gallery for Windows called IntelliP , using ImageAI and UI framework Kivy. Follow this
- link to download page of the application and its source code.
- We also welcome submissions of applications and systems built by you and powered by ImageAI for listings here. Should you want your ImageAI powered
- developments listed here, you can reach to us via our Contacts below.
-
+## AI Practice Recommendations
-
AI Practice Recommendations
- For anyone interested in building AI systems and using them for business, economic, social and research purposes, it is critical that the person knows the likely positive, negative and unprecedented impacts the use of such technologies will have. They must also be aware of approaches and practices recommended by experienced industry experts to ensure every use of AI brings overall benefit to mankind. We therefore recommend to everyone that wishes to use ImageAI and other AI tools and resources to read Microsoft's January 2018 publication on AI titled "The Future Computed : Artificial Intelligence and its role in society ".
+For anyone interested in building AI systems and using them for business, economic, social and research purposes, it is critical that the person knows the likely positive, negative and unprecedented impacts the use of such technologies will have.
+They must also be aware of approaches and practices recommended by experienced industry experts to ensure every use of AI brings overall benefit to mankind.
+We therefore recommend to everyone that wishes to use ImageAI and other AI tools and resources to read Microsoft's January 2018 publication on AI titled "The Future Computed : Artificial Intelligence and its role in society".
Kindly follow the link below to download the publication.
-
-We are inviting anyone who wishes to contribute to the ImageAI project to reach to us. We primarily need contributions in translating the documentation of the project's code to major languages that includes but not limited to French, Spanish, Portuguese, Arabian and more. We want every developer and researcher around the world to benefit from this project irrespective of their native languages.
-
-We give special thanks to [Kang vcar](https://github.com/kangvcar/) for his incredible and excellent work in translating ImageAI's documentation to the Chinese language. Find below the contact details of those who have contributed immensely to the ImageAI project.
-
-
-
-
+- **Moses Olafenwa**
+ * _Email:_ guymodscientist@gmail.com
+ * _Twitter:_ [@OlafenwaMoses](https://twitter.com/OlafenwaMoses)
+ * _Medium:_ [@guymodscientist](https://medium.com/@guymodscientist)
+ * _Facebook:_ [moses.olafenwa](https://facebook.com/moses.olafenwa)
+- **John Olafenwa**
+ * _Email:_ johnolafenwa@gmail.com
+ * _Website:_ [https://john.aicommons.science](https://john.aicommons.science)
+ * _Twitter:_ [@johnolafenwa](https://twitter.com/johnolafenwa)
+ * _Medium:_ [@johnolafenwa](https://medium.com/@johnolafenwa)
+ * _Facebook:_ [olafenwajohn](https://facebook.com/olafenwajohn)
+
+
+### Citation
+
+
+You can cite **ImageAI** in your projects and research papers via the **BibTeX** entry below.
+
+```
+@misc {ImageAI,
+ author = "Moses",
+ title = "ImageAI, an open source python library built to empower developers to build applications and systems with self-contained Computer Vision capabilities",
+ url = "https://github.com/OlafenwaMoses/ImageAI",
+ month = "mar",
+ year = "2018--"
+}
+```
+
+
+
+ ### References
-
References
-
- 1. Somshubra Majumdar, DenseNet Implementation of the paper, Densely Connected Convolutional Networks in Keras
- https://github.com/titu1994/DenseNet/
-
-
-
+ 1. Somshubra Majumdar, DenseNet Implementation of the paper, Densely Connected Convolutional Networks in Keras
+[https://github.com/titu1994/DenseNet](https://github.com/titu1994/DenseNet)
+ 2. Broad Institute of MIT and Harvard, Keras package for deep residual networks
+[https://github.com/broadinstitute/keras-resnet](https://github.com/broadinstitute/keras-resnet)
+ 3. Fizyr, Keras implementation of RetinaNet object detection
+[https://github.com/fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)
+ 4. Francois Chollet, Keras code and weights files for popular deeplearning models
+[https://github.com/fchollet/deep-learning-models](https://github.com/fchollet/deep-learning-models)
+ 5. Forrest N. et al, SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size
+[https://arxiv.org/abs/1602.07360](https://arxiv.org/abs/1602.07360)
+ 6. Kaiming H. et al, Deep Residual Learning for Image Recognition
+[https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385)
+ 7. Szegedy. et al, Rethinking the Inception Architecture for Computer Vision
+[https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567)
+ 8. Gao. et al, Densely Connected Convolutional Networks
+[https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)
+ 9. Tsung-Yi. et al, Focal Loss for Dense Object Detection
+[https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002)
+ 10. O Russakovsky et al, ImageNet Large Scale Visual Recognition Challenge
+[https://arxiv.org/abs/1409.0575](https://arxiv.org/abs/1409.0575)
+ 11. TY Lin et al, Microsoft COCO: Common Objects in Context
+[https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312)
+ 12. Moses & John Olafenwa, A collection of images of identifiable professionals.
+[https://github.com/OlafenwaMoses/IdenProf](https://github.com/OlafenwaMoses/IdenProf)
+ 13. Joseph Redmon and Ali Farhadi, YOLOv3: An Incremental Improvement.
+[https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767)
+ 14. Experiencor, Training and Detecting Objects with YOLO3
+[https://github.com/experiencor/keras-yolo3](https://github.com/experiencor/keras-yolo3)
+ 15. MobileNetV2: Inverted Residuals and Linear Bottlenecks
+[https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381)
+ 16. YOLOv3 in PyTorch > ONNX > CoreML > TFLite [https://github.com/ultralytics/yolov3](https://github.com/ultralytics/yolov3)
diff --git a/Thumbs.db b/Thumbs.db
deleted file mode 100644
index 3c985a06..00000000
Binary files a/Thumbs.db and /dev/null differ
diff --git a/images/1.jpg b/data-images/1.jpg
similarity index 100%
rename from images/1.jpg
rename to data-images/1.jpg
diff --git a/images/2.jpg b/data-images/2.jpg
similarity index 100%
rename from images/2.jpg
rename to data-images/2.jpg
diff --git a/images/3.jpg b/data-images/3.jpg
similarity index 100%
rename from images/3.jpg
rename to data-images/3.jpg
diff --git a/images/4.jpg b/data-images/4.jpg
similarity index 100%
rename from images/4.jpg
rename to data-images/4.jpg
diff --git a/images/5.jpg b/data-images/5.jpg
similarity index 100%
rename from images/5.jpg
rename to data-images/5.jpg
diff --git a/images/5fast.jpg b/data-images/5fast.jpg
similarity index 100%
rename from images/5fast.jpg
rename to data-images/5fast.jpg
diff --git a/images/5faster.jpg b/data-images/5faster.jpg
similarity index 100%
rename from images/5faster.jpg
rename to data-images/5faster.jpg
diff --git a/images/5fastest.jpg b/data-images/5fastest.jpg
similarity index 100%
rename from images/5fastest.jpg
rename to data-images/5fastest.jpg
diff --git a/images/5flash.jpg b/data-images/5flash.jpg
similarity index 100%
rename from images/5flash.jpg
rename to data-images/5flash.jpg
diff --git a/images/5normal.jpg b/data-images/5normal.jpg
similarity index 100%
rename from images/5normal.jpg
rename to data-images/5normal.jpg
diff --git a/images/6flash.jpg b/data-images/6flash.jpg
similarity index 100%
rename from images/6flash.jpg
rename to data-images/6flash.jpg
diff --git a/data-images/customvideodetection.gif b/data-images/customvideodetection.gif
new file mode 100644
index 00000000..6579ba4a
Binary files /dev/null and b/data-images/customvideodetection.gif differ
diff --git a/data-images/customvideodetection.jpg b/data-images/customvideodetection.jpg
new file mode 100644
index 00000000..d5c28ded
Binary files /dev/null and b/data-images/customvideodetection.jpg differ
diff --git a/data-images/headsets.jpg b/data-images/headsets.jpg
new file mode 100644
index 00000000..3fde9bd9
Binary files /dev/null and b/data-images/headsets.jpg differ
diff --git a/data-images/holo-video-detected.jpg b/data-images/holo-video-detected.jpg
new file mode 100644
index 00000000..65d64473
Binary files /dev/null and b/data-images/holo-video-detected.jpg differ
diff --git a/data-images/holo-video.jpg b/data-images/holo-video.jpg
new file mode 100644
index 00000000..6d8e6e86
Binary files /dev/null and b/data-images/holo-video.jpg differ
diff --git a/data-images/holo1.jpg b/data-images/holo1.jpg
new file mode 100644
index 00000000..781b73e8
Binary files /dev/null and b/data-images/holo1.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-1.jpg b/data-images/holo2-detected-objects/hololens-1.jpg
new file mode 100644
index 00000000..b8ddcfab
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-1.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-2.jpg b/data-images/holo2-detected-objects/hololens-2.jpg
new file mode 100644
index 00000000..d864644a
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-2.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-3.jpg b/data-images/holo2-detected-objects/hololens-3.jpg
new file mode 100644
index 00000000..a5db86f3
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-3.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-4.jpg b/data-images/holo2-detected-objects/hololens-4.jpg
new file mode 100644
index 00000000..1f82f7a4
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-4.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-5.jpg b/data-images/holo2-detected-objects/hololens-5.jpg
new file mode 100644
index 00000000..edc5b4ec
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-5.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-6.jpg b/data-images/holo2-detected-objects/hololens-6.jpg
new file mode 100644
index 00000000..9aaacf21
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-6.jpg differ
diff --git a/data-images/holo2-detected-objects/hololens-7.jpg b/data-images/holo2-detected-objects/hololens-7.jpg
new file mode 100644
index 00000000..1aa5503e
Binary files /dev/null and b/data-images/holo2-detected-objects/hololens-7.jpg differ
diff --git a/data-images/holo2-detected.jpg b/data-images/holo2-detected.jpg
new file mode 100644
index 00000000..9eebf56a
Binary files /dev/null and b/data-images/holo2-detected.jpg differ
diff --git a/data-images/holo2-nodetails.jpg b/data-images/holo2-nodetails.jpg
new file mode 100644
index 00000000..ae936b5c
Binary files /dev/null and b/data-images/holo2-nodetails.jpg differ
diff --git a/data-images/holo2.jpg b/data-images/holo2.jpg
new file mode 100644
index 00000000..6d834a13
Binary files /dev/null and b/data-images/holo2.jpg differ
diff --git a/images/idenprof.jpg b/data-images/idenprof.jpg
similarity index 100%
rename from images/idenprof.jpg
rename to data-images/idenprof.jpg
diff --git a/images/image2.jpg b/data-images/image2.jpg
similarity index 100%
rename from images/image2.jpg
rename to data-images/image2.jpg
diff --git a/images/image2new.jpg b/data-images/image2detected.jpg
similarity index 100%
rename from images/image2new.jpg
rename to data-images/image2detected.jpg
diff --git a/data-images/image2new.jpg b/data-images/image2new.jpg
new file mode 100644
index 00000000..408af499
Binary files /dev/null and b/data-images/image2new.jpg differ
diff --git a/images/image3.jpg b/data-images/image3.jpg
similarity index 100%
rename from images/image3.jpg
rename to data-images/image3.jpg
diff --git a/images/image3custom.jpg b/data-images/image3custom.jpg
similarity index 100%
rename from images/image3custom.jpg
rename to data-images/image3custom.jpg
diff --git a/images/image3new.jpg-objects/bicycle-5.jpg b/data-images/image3new-objects/bicycle-5.jpg
similarity index 100%
rename from images/image3new.jpg-objects/bicycle-5.jpg
rename to data-images/image3new-objects/bicycle-5.jpg
diff --git a/images/image3new.jpg-objects/car-4.jpg b/data-images/image3new-objects/car-4.jpg
similarity index 100%
rename from images/image3new.jpg-objects/car-4.jpg
rename to data-images/image3new-objects/car-4.jpg
diff --git a/images/image3new.jpg-objects/cat-2.jpg b/data-images/image3new-objects/cat-2.jpg
similarity index 100%
rename from images/image3new.jpg-objects/cat-2.jpg
rename to data-images/image3new-objects/cat-2.jpg
diff --git a/images/image3new.jpg-objects/dog-1.jpg b/data-images/image3new-objects/dog-1.jpg
similarity index 100%
rename from images/image3new.jpg-objects/dog-1.jpg
rename to data-images/image3new-objects/dog-1.jpg
diff --git a/images/image3new.jpg-objects/motorcycle-3.jpg b/data-images/image3new-objects/motorcycle-3.jpg
similarity index 100%
rename from images/image3new.jpg-objects/motorcycle-3.jpg
rename to data-images/image3new-objects/motorcycle-3.jpg
diff --git a/images/image3new.jpg-objects/person-10.jpg b/data-images/image3new-objects/person-10.jpg
similarity index 100%
rename from images/image3new.jpg-objects/person-10.jpg
rename to data-images/image3new-objects/person-10.jpg
diff --git a/images/image3new.jpg-objects/person-6.jpg b/data-images/image3new-objects/person-6.jpg
similarity index 100%
rename from images/image3new.jpg-objects/person-6.jpg
rename to data-images/image3new-objects/person-6.jpg
diff --git a/images/image3new.jpg-objects/person-7.jpg b/data-images/image3new-objects/person-7.jpg
similarity index 100%
rename from images/image3new.jpg-objects/person-7.jpg
rename to data-images/image3new-objects/person-7.jpg
diff --git a/images/image3new.jpg-objects/person-8.jpg b/data-images/image3new-objects/person-8.jpg
similarity index 100%
rename from images/image3new.jpg-objects/person-8.jpg
rename to data-images/image3new-objects/person-8.jpg
diff --git a/images/image3new.jpg-objects/person-9.jpg b/data-images/image3new-objects/person-9.jpg
similarity index 100%
rename from images/image3new.jpg-objects/person-9.jpg
rename to data-images/image3new-objects/person-9.jpg
diff --git a/images/image3new.jpg b/data-images/image3new.jpg
similarity index 100%
rename from images/image3new.jpg
rename to data-images/image3new.jpg
diff --git a/images/nodetails.jpg b/data-images/nodetails.jpg
similarity index 100%
rename from images/nodetails.jpg
rename to data-images/nodetails.jpg
diff --git a/images/video--1.jpg b/data-images/video--1.jpg
similarity index 100%
rename from images/video--1.jpg
rename to data-images/video--1.jpg
diff --git a/images/video-1.jpg b/data-images/video-1.jpg
similarity index 100%
rename from images/video-1.jpg
rename to data-images/video-1.jpg
diff --git a/images/video-2.jpg b/data-images/video-2.jpg
similarity index 100%
rename from images/video-2.jpg
rename to data-images/video-2.jpg
diff --git a/images/video-3.jpg b/data-images/video-3.jpg
similarity index 100%
rename from images/video-3.jpg
rename to data-images/video-3.jpg
diff --git a/images/video-4.jpg b/data-images/video-4.jpg
similarity index 100%
rename from images/video-4.jpg
rename to data-images/video-4.jpg
diff --git a/images/video1.jpg b/data-images/video1.jpg
similarity index 100%
rename from images/video1.jpg
rename to data-images/video1.jpg
diff --git a/images/video2.jpg b/data-images/video2.jpg
similarity index 100%
rename from images/video2.jpg
rename to data-images/video2.jpg
diff --git a/images/video_analysis_visualization.jpg b/data-images/video_analysis_visualization.jpg
similarity index 100%
rename from images/video_analysis_visualization.jpg
rename to data-images/video_analysis_visualization.jpg
diff --git a/images/yolo.jpg b/data-images/yolo.jpg
similarity index 100%
rename from images/yolo.jpg
rename to data-images/yolo.jpg
diff --git a/data-videos/holo1.mp4 b/data-videos/holo1.mp4
new file mode 100644
index 00000000..28a9c12c
Binary files /dev/null and b/data-videos/holo1.mp4 differ
diff --git a/videos/traffic-mini.mp4 b/data-videos/traffic-mini.mp4
similarity index 100%
rename from videos/traffic-mini.mp4
rename to data-videos/traffic-mini.mp4
diff --git a/videos/traffic.mp4 b/data-videos/traffic.mp4
similarity index 100%
rename from videos/traffic.mp4
rename to data-videos/traffic.mp4
diff --git a/dist/imageai-2.0.2-py3-none-any.whl b/dist/imageai-2.0.2-py3-none-any.whl
deleted file mode 100644
index 12ea09c4..00000000
Binary files a/dist/imageai-2.0.2-py3-none-any.whl and /dev/null differ
diff --git a/examples/camera_feed_detection.py b/examples/camera_feed_detection.py
index e1fd7171..36720d0d 100644
--- a/examples/camera_feed_detection.py
+++ b/examples/camera_feed_detection.py
@@ -8,7 +8,7 @@
detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath(os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath(os.path.join(execution_path , "yolov3.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0
detector.loadModel()
video_path = detector.detectObjectsFromVideo(camera_input=camera,
diff --git a/examples/custom_detection.py b/examples/custom_detection.py
new file mode 100644
index 00000000..fc8c2cd8
--- /dev/null
+++ b/examples/custom_detection.py
@@ -0,0 +1,22 @@
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+detector.loadModel()
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+
+"""
+EXAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_array_input_output.py b/examples/custom_detection_array_input_output.py
new file mode 100644
index 00000000..baca3166
--- /dev/null
+++ b/examples/custom_detection_array_input_output.py
@@ -0,0 +1,29 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+image_array = cv2.imread("holo2.jpg")
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+detector.loadModel()
+detected_image, detections = detector.detectObjectsFromImage(input_image=image_array, input_type="array", output_type="array")
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+cv2.imshow("Main Image", detected_image)
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_extract_objects.py b/examples/custom_detection_extract_objects.py
new file mode 100644
index 00000000..566bf160
--- /dev/null
+++ b/examples/custom_detection_extract_objects.py
@@ -0,0 +1,37 @@
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+detector.loadModel()
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+
+"""
+SAMPLE RESULT
+
+holo2-detected-objects\hololens-1.jpg
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+---------------
+
+holo2-detected-objects\hololens-1.jpg
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+---------------
+
+holo2-detected-objects\hololens-1.jpg
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+---------------
+
+holo2-detected-objects\hololens-1.jpg
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+---------------
+
+holo2-detected-objects\hololens-1.jpg
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+---------------
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_from_array_extract_objects_array.py b/examples/custom_detection_from_array_extract_objects_array.py
new file mode 100644
index 00000000..12dbd733
--- /dev/null
+++ b/examples/custom_detection_from_array_extract_objects_array.py
@@ -0,0 +1,37 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+image_array = cv2.imread("holo2.jpg")
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+detector.loadModel()
+detected_image, detections, extracted_objects = detector.detectObjectsFromImage(input_image=image_array, extract_detected_objects=True, input_type="array", output_type="array")
+
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+
+cv2.imshow("Main Image", detected_image)
+count = 0
+for img in extracted_objects:
+ count += 1
+
+ cv2.imshow("Window" + str(count), img)
+
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_from_file_extract_objects_array.py b/examples/custom_detection_from_file_extract_objects_array.py
new file mode 100644
index 00000000..00124058
--- /dev/null
+++ b/examples/custom_detection_from_file_extract_objects_array.py
@@ -0,0 +1,36 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+detector.loadModel()
+detected_image, detections, extracted_objects = detector.detectObjectsFromImage(input_image="holo2.jpg", extract_detected_objects=True, output_type="array")
+
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+cv2.imshow("Main Image", detected_image)
+count = 0
+for img in extracted_objects:
+ count += 1
+
+ cv2.imshow("Window" + str(count), img)
+
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_train.py b/examples/custom_detection_train.py
new file mode 100644
index 00000000..29b67244
--- /dev/null
+++ b/examples/custom_detection_train.py
@@ -0,0 +1,40 @@
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
+#download pre-trained model via https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
+# If you are training to detect more than 1 object, set names of objects above like object_names_array=["hololens", "google-glass", "oculus", "magic-leap"]
+trainer.trainModel()
+
+
+
+"""
+SAMPLE RESULT
+
+Generating anchor boxes for training images...
+thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
+n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
+====================
+Pretrained YOLOv3 model loaded to initialize weights
+====================
+Epoch 1/100
+----------
+Train:
+30it [00:14, 2.09it/s]
+ box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
+Validation:
+15it [01:45, 7.05s/it]
+ recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030
+
+Epoch 2/100
+----------
+Train:
+30it [00:07, 4.25it/s]
+ box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
+Validation:
+15it [01:37, 6.53s/it]
+ recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
+"""
+
diff --git a/examples/custom_detection_video.py b/examples/custom_detection_video.py
new file mode 100644
index 00000000..3e24727c
--- /dev/null
+++ b/examples/custom_detection_video.py
@@ -0,0 +1,16 @@
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
\ No newline at end of file
diff --git a/examples/custom_model_prediction.py b/examples/custom_model_prediction.py
index a5cb965f..900c0fb3 100644
--- a/examples/custom_model_prediction.py
+++ b/examples/custom_model_prediction.py
@@ -1,15 +1,15 @@
-from imageai.Prediction.Custom import CustomImagePrediction
+from imageai.Classification.Custom import CustomImageClassification
import os
execution_path = os.getcwd()
-prediction = CustomImagePrediction()
-prediction.setModelTypeAsResNet()
-prediction.setModelPath(os.path.join(execution_path, "resnet_model_ex-020_acc-0.651714.h5"))
-prediction.setJsonPath(os.path.join(execution_path, "model_class.json"))
+prediction = CustomImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt
+prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_classes.json")) # Download from here https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json
prediction.loadModel(num_objects=10)
-predictions, probabilities = prediction.predictImage(os.path.join(execution_path, "4.jpg"), result_count=5)
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "9.jpg"), result_count=5)
for eachPrediction, eachProbability in zip(predictions, probabilities):
print(eachPrediction , " : " , eachProbability)
\ No newline at end of file
diff --git a/examples/custom_model_training.py b/examples/custom_model_training.py
index 72d33395..13747e9b 100644
--- a/examples/custom_model_training.py
+++ b/examples/custom_model_training.py
@@ -1,7 +1,7 @@
-from imageai.Prediction.Custom import ModelTraining
+from imageai.Classification.Custom import ClassificationModelTrainer
-model_trainer = ModelTraining()
-model_trainer.setModelTypeAsResNet()
-model_trainer.setDataDirectory(r"C:/Users/Moses/Documents/Moses/W7/AI/Custom Datasets/idenprof")
-model_trainer.trainModel(num_objects=10, num_experiments=20, enhance_data=True, batch_size=32, show_network_summary=True)
+model_trainer = ClassificationModelTrainer()
+model_trainer.setModelTypeAsResNet50()
+model_trainer.setDataDirectory("idenprof")
+model_trainer.trainModel(num_experiments=200, batch_size=32)
diff --git a/examples/image_custom_object_detection.py b/examples/image_custom_object_detection.py
index 8b1a7511..fa6628fc 100644
--- a/examples/image_custom_object_detection.py
+++ b/examples/image_custom_object_detection.py
@@ -6,15 +6,14 @@
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath( os.path.join(execution_path , "yolov3.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0
detector.loadModel()
our_time = time()
-custom = detector.CustomObjects(person=True, dog=True)
+custom = detector.CustomObjects(bicycle=True, backpack=True)
-detections = detector.detectCustomObjectsFromImage( custom_objects=custom, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new-custom.jpg"), minimum_percentage_probability=30)
-print("IT TOOK : ", time() - our_time)
+detections = detector.detectCustomObjectsFromImage( custom_objects=custom, input_image=os.path.join(execution_path , "7.jpg"), output_image_path=os.path.join(execution_path , "7-detected.jpg"), minimum_percentage_probability=40)
for eachObject in detections:
print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
print("--------------------------------")
diff --git a/examples/image_object_detection.py b/examples/image_object_detection.py
deleted file mode 100644
index fb6cb0c8..00000000
--- a/examples/image_object_detection.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from imageai.Detection import ObjectDetection
-import os
-from time import time
-
-execution_path = os.getcwd()
-
-detector = ObjectDetection()
-detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
-detector.loadModel()
-
-our_time = time()
-detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30)
-print("IT TOOK : ", time() - our_time)
-for eachObject in detections:
- print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
- print("--------------------------------")
diff --git a/examples/image_prediction.py b/examples/image_prediction.py
index a8ccfacb..271022c0 100644
--- a/examples/image_prediction.py
+++ b/examples/image_prediction.py
@@ -1,13 +1,13 @@
-from imageai.Prediction import ImagePrediction
+from imageai.Classification import ImageClassification
import os
execution_path = os.getcwd()
-prediction = ImagePrediction()
-prediction.setModelTypeAsResNet()
-prediction.setModelPath(os.path.join(execution_path, "resnet50_weights_tf_dim_ordering_tf_kernels.h5"))
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth
prediction.loadModel()
-predictions, probabilities = prediction.predictImage(os.path.join(execution_path, "image1.jpg"), result_count=10)
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=10)
for eachPrediction, eachProbability in zip(predictions, probabilities):
print(eachPrediction , " : " , eachProbability)
\ No newline at end of file
diff --git a/examples/multiple_image_prediction.py b/examples/multiple_image_prediction.py
deleted file mode 100644
index 31b88c54..00000000
--- a/examples/multiple_image_prediction.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from imageai.Prediction import ImagePrediction
-import os
-
-execution_path = os.getcwd()
-
-multiple_prediction = ImagePrediction()
-multiple_prediction.setModelTypeAsResNet()
-multiple_prediction.setModelPath(os.path.join(execution_path, "resnet50_weights_tf_dim_ordering_tf_kernels.h5"))
-multiple_prediction.loadModel()
-
-all_images_array = []
-
-all_files = os.listdir(execution_path)
-for each_file in all_files:
- if(each_file.endswith(".jpg") or each_file.endswith(".png")):
- all_images_array.append(each_file)
-
-results_array = multiple_prediction.predictMultipleImages(all_images_array, result_count_per_image=5)
-
-for each_result in results_array:
- predictions, percentage_probabilities = each_result["predictions"], each_result["percentage_probabilities"]
- for index in range(len(predictions)):
- print(predictions[index] , " : " , percentage_probabilities[index])
- print("-----------------------")
\ No newline at end of file
diff --git a/examples/object_detection.py b/examples/object_detection.py
index cfa8f4fe..ca8b3b26 100644
--- a/examples/object_detection.py
+++ b/examples/object_detection.py
@@ -4,10 +4,10 @@
execution_path = os.getcwd()
detector = ObjectDetection()
-detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth
detector.loadModel()
-detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30)
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "2.jpg"), output_image_path=os.path.join(execution_path , "2_detected.jpg"), minimum_percentage_probability=40)
for eachObject in detections:
print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
diff --git a/examples/video_analysis_per_frame.py b/examples/video_analysis_per_frame.py
index 88b4145b..90a5bf93 100644
--- a/examples/video_analysis_per_frame.py
+++ b/examples/video_analysis_per_frame.py
@@ -48,7 +48,7 @@ def forFrame(frame_number, output_array, output_count, returned_frame):
video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
-video_detector.setModelPath(os.path.join(execution_path, "yolo.h5"))
+video_detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
video_detector.loadModel()
plt.show()
diff --git a/examples/video_analysis_per_second.py b/examples/video_analysis_per_second.py
index d82f1419..7ea19fff 100644
--- a/examples/video_analysis_per_second.py
+++ b/examples/video_analysis_per_second.py
@@ -48,7 +48,7 @@ def forSecond(frame_number, output_arrays, count_arrays, average_count, returned
video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
-video_detector.setModelPath(os.path.join(execution_path, "yolo.h5"))
+video_detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
video_detector.loadModel()
plt.show()
diff --git a/examples/video_custom_object_detection.py b/examples/video_custom_object_detection.py
index d5d00543..09ca54f4 100644
--- a/examples/video_custom_object_detection.py
+++ b/examples/video_custom_object_detection.py
@@ -5,7 +5,7 @@
detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
detector.loadModel()
custom = detector.CustomObjects(person=True, motorcycle=True, bus=True)
diff --git a/examples/video_object_detection.py b/examples/video_object_detection.py
index 4bc0ba66..8e0ee058 100644
--- a/examples/video_object_detection.py
+++ b/examples/video_object_detection.py
@@ -5,7 +5,7 @@
detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
detector.loadModel()
video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
diff --git a/imageai/Classification/CUSTOMCLASSIFICATION.md b/imageai/Classification/CUSTOMCLASSIFICATION.md
new file mode 100644
index 00000000..3ba38565
--- /dev/null
+++ b/imageai/Classification/CUSTOMCLASSIFICATION.md
@@ -0,0 +1,106 @@
+# ImageAI : Custom Image Classification
+
+ImageAI provides 4 different algorithms and model types to perform custom image prediction using your custom models.
+You will be able to use your model trained with **ImageAI** and the corresponding model_class JSON file to predict custom objects
+that you have trained the model on.
+
+### TABLE OF CONTENTS
+
+- :white_square_button: Custom Model Prediction
+- :white_square_button: Custom Model Prediction with Full Model (NEW)
+
+### Custom Model Prediction
+
+
+In this example, we will be using the model trained for 20 experiments on **IdenProf**, a dataset of uniformed professionals and achieved 65.17% accuracy on the test dataset.
+(You can use your own trained model and generated JSON file. This 'class' is provided mainly for the purpose to use your own custom models.)
+Download the ResNet model of the model and JSON files in links below:
+
+- [**ResNet50**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt) _(Size = 90.4 mb)_
+- [**idenprof_model_class.json file**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json)
+
+Great!
+Once you have downloaded this model file and the JSON file, start a new python project, and then copy the model file and the JSON file to your project folder where your python files (.py files) will be.
+Download the image below, or take any image on your computer that include any of the following professionals(Chef, Doctor, Engineer, Farmer, Fireman, Judge, Mechanic, Pilot, Police and Waiter) and copy it to your python project's folder.
+Then create a python file and give it a name; an example is **FirstCustomPrediction.py**.
+Then write the code below into the python file:
+
+### FirstCustomPrediction.py
+
+```python
+from imageai.Classification.Custom import CustomImageClassification
+import os
+
+execution_path = os.getcwd()
+
+prediction = CustomImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
+prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_class.json"))
+prediction.loadModel(num_objects=10)
+
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
+
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction + " : " + eachProbability)
+```
+
+**Sample Result:**
+
+
+```
+mechanic : 76.82620286941528
+chef : 10.106072574853897
+waiter : 4.036874696612358
+police : 2.6663416996598244
+pilot : 2.239348366856575
+```
+
+The code above works as follows:
+```python
+from imageai.Classification.Custom import CustomImageClassification
+import os
+```
+The code above imports the **ImageAI** library for custom image prediction and the python **os** class.
+
+```python
+execution_path = os.getcwd()
+```
+
+The above line obtains the path to the folder that contains your python file (in this example, your FirstCustomPrediction.py).
+
+```python
+prediction = CustomImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
+prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_class.json"))
+prediction.loadModel(num_objects=10)
+```
+
+In the lines above, we created and instance of the `CustomImageClassification()`
+ class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()`
+ in the second line, we set the model path of the prediction object to the path of the custom model file (`resnet50-idenprof-test_acc_0.78200_epoch-91.pt`) we copied to the python file folder
+ in the third line, we set the path to the idenprof_model_class.json of the model, we load the model and parse the number of objected that can be predicted in the model.
+
+```python
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
+```
+
+In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 10 in this case) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.
+
+```python
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction + " : " + eachProbability)
+```
+
+The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.
+
+**CustomImageClassification** class also supports the multiple predictions, input types and prediction speeds that are contained
+in the **ImageClassification** class. Follow this [link](README.md) to see all the details.
+
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
diff --git a/imageai/Classification/CUSTOMTRAINING.md b/imageai/Classification/CUSTOMTRAINING.md
new file mode 100644
index 00000000..2b3cb949
--- /dev/null
+++ b/imageai/Classification/CUSTOMTRAINING.md
@@ -0,0 +1,144 @@
+# ImageAI : Custom Prediction Model Training
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
+
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
+
+
+[](https://jarvis.genxr.co)
+
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
+
+
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
+
+
+[]((https://www.genxr.co/theia-engine))
+
+
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
+
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+**ImageAI** provides the most simple and powerful approach to training custom image prediction models
+using state-of-the-art SqueezeNet, ResNet50, InceptionV3 and DenseNet
+which you can load into the `imageai.Classification.Custom.CustomImageClassification` class. This allows
+ you to train your own model on any set of images that corresponds to any type of objects/persons.
+The training process generates a JSON file that maps the objects types in your image dataset
+and creates lots of models. You will then pick the model with the highest accuracy and perform custom
+image prediction using the model and the JSON file generated.
+
+### TABLE OF CONTENTS
+- :white_square_button: Custom Model Training Prediction
+- :white_square_button: Saving Full Custom Model
+- :white_square_button: Training on the IdenProf Dataset
+- :white_square_button: Continuous Model Training
+- :white_square_button: Transfer Learning (Training from a pre-trained model)
+
+
+### Custom Model Training
+
+
+Because model training is a compute intensive tasks, we strongly advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing model training on CPU will my take hours or days. With NVIDIA GPU powered computer system, this will take a few hours. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available.
+
+To train a custom prediction model, you need to prepare the images you want to use to train the model.
+You will prepare the images as follows:
+
+1. Create a dataset folder with the name you will like your dataset to be called (e.g pets)
+2. In the dataset folder, create a folder by the name **train**
+3. In the dataset folder, create a folder by the name **test**
+4. In the train folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
+5. In the test folder, create a folder for each object you want to the model to predict and give
+ the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
+6. In each folder present in the train folder, put the images of each object in its respective folder. This images are the ones to be used to train the model To produce a model that can perform well in practical applications, I recommend you about 500 or more images per object. 1000 images per object is just great
+7. In each folder present in the test folder, put about 100 to 200 images of each object in its respective folder. These images are the ones to be used to test the model as it trains
+8. Once you have done this, the structure of your image dataset folder should look like below:
+ ```
+ pets//train//dog//dog-train-images
+ pets//train//cat//cat-train-images
+ pets//train//squirrel//squirrel-train-images
+ pets//train//snake//snake-train-images
+ pets//test//dog//dog-test-images
+ pets//test//cat//cat-test-images
+ pets//test//squirrel//squirrel-test-images
+ pets//test//snake//snake-test-images
+ ```
+9. Then your training code goes as follows:
+ ```python
+ from imageai.Classification.Custom import ClassificationModelTrainer
+ model_trainer = ClassificationModelTrainer()
+ model_trainer.setModelTypeAsResNet50()
+ model_trainer.setDataDirectory("pets")
+ model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
+ ```
+
+ Yes! Just 5 lines of code and you can train any of the available 4 state-of-the-art Deep Learning algorithms on your custom dataset.
+Now lets take a look at how the code above works.
+
+```python
+from imageai.Classification.Custom import ClassificationModelTrainer
+model_trainer = ClassificationModelTrainer()
+model_trainer.setModelTypeAsResNet50()
+model_trainer.setDataDirectory("pets")
+```
+
+In the first line, we import the **ImageAI** model training class, then we define the model trainer in the second line,
+ we set the network type in the third line and set the path to the image dataset we want to train the network on.
+
+```python
+model_trainer.trainModel(num_experiments=100, batch_size=32)
+```
+
+In the code above, we start the training process. The parameters stated in the function are as below:
+- **num_experiments** : this is to state the number of times the network will train over all the training images,
+ which is also called epochs
+- **batch_size** : This is to state the number of images the network will process at ones. The images
+ are processed in batches until they are exhausted per each experiment performed.
+
+
+When you start the training, you should see something like this in the console:
+
+```
+==================================================
+Training with GPU
+==================================================
+Epoch 1/100
+----------
+100%|█████████████████████████████████████████████████████████████████████████████████| 282/282 [02:15<00:00, 2.08it/s]
+train Loss: 3.8062 Accuracy: 0.1178
+100%|███████████████████████████████████████████████████████████████████████████████████| 63/63 [00:26<00:00, 2.36it/s]
+test Loss: 2.2829 Accuracy: 0.1215
+Epoch 2/100
+----------
+100%|█████████████████████████████████████████████████████████████████████████████████| 282/282 [01:57<00:00, 2.40it/s]
+train Loss: 2.2682 Accuracy: 0.1303
+100%|███████████████████████████████████████████████████████████████████████████████████| 63/63 [00:20<00:00, 3.07it/s]
+test Loss: 2.2388 Accuracy: 0.1470
+```
+
+Let us explain the details shown above:
+1. The line **Epoch 1/100** means the network is training the first experiment of the targeted 100
+2. The line `1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500` represents the number of batches that has been trained in the present experiment
+3. The best model is automatically saved to `/models>`
+
+ Once you are done training your custom model, you can use the "CustomImageClassification" class to perform image prediction with your model. Simply follow the link below.
+[imageai/Classification/CUSTOMCLASSIFICATION.md](https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Classification/CUSTOMCLASSIFICATION.md)
+
+
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
diff --git a/imageai/Classification/Custom/__init__.py b/imageai/Classification/Custom/__init__.py
new file mode 100644
index 00000000..c8ad783e
--- /dev/null
+++ b/imageai/Classification/Custom/__init__.py
@@ -0,0 +1,560 @@
+import time, warnings
+import os
+import copy
+import re
+import json
+from typing import List, Tuple, Union
+from PIL import Image
+import numpy as np
+
+import torch
+import torch.nn as nn
+from torch.optim import lr_scheduler
+from torchvision import datasets
+from torchvision import transforms
+from torchvision.models import mobilenet_v2, inception_v3, resnet50, densenet121
+from torchvision.models.inception import InceptionOutputs
+
+from .data_transformation import data_transforms1, data_transforms2
+from .training_params import resnet50_train_params, densenet121_train_params, inception_v3_train_params, mobilenet_v2_train_params
+from tqdm import tqdm
+
+from ...backend_check.model_extension import extension_check
+
+
+
+class ClassificationModelTrainer():
+ """
+ This is the Classification Model training class, that allows you to define a deep learning network
+ from the 4 available networks types supported by ImageAI which are MobileNetv2, ResNet50,
+ InceptionV3 and DenseNet121 and then train on custom image data.
+ """
+
+ def __init__(self) -> None:
+ self.__model_type = ""
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__data_dir = ""
+ self.__data_loaders = None
+ self.__class_names = None
+ self.__dataset_sizes = None
+ self.__dataset_name = ""
+ self.__model = None
+ self.__optimizer = None
+ self.__lr_scheduler = None
+ self.__loss_fn = nn.CrossEntropyLoss()
+ self.__transfer_learning_mode = "fine_tune_all"
+ self.__model_path = ""
+ self.__training_params = None
+
+ def __set_training_param(self) -> None:
+ if not self.__model_type:
+ raise RuntimeError("The model type is not set!!!")
+ self.__model = self.__training_params["model"]
+ optimizer = self.__training_params["optimizer"]
+ lr_decay_rate = self.__training_params["lr_decay_rate"]
+ lr_step_size = self.__training_params["lr_step_size"]
+ lr = self.__training_params["lr"]
+ weight_decay = self.__training_params["weight_decay"]
+
+ if self.__model_path:
+ self.__set_transfer_learning_mode()
+ print("==> Transfer learning enabled")
+
+ # change the last linear layer to have output features of
+ # same size as the number of unique classes in the new
+ # dataset.
+ if self.__model_type == "mobilenet_v2":
+ in_features = self.__model.classifier[1].in_features
+ self.__model.classifier[1] = nn.Linear(in_features, len(self.__class_names))
+ elif self.__model_type == "densenet121":
+ in_features = self.__model.classifier.in_features
+ self.__model.classifier = nn.Linear(in_features, len(self.__class_names))
+ else:
+ in_features = self.__model.fc.in_features
+ self.__model.fc = nn.Linear(in_features, len(self.__class_names))
+
+ self.__model.to(self.__device)
+ self.__optimizer = optimizer(
+ self.__model.parameters(),
+ lr=lr,
+ momentum=0.9,
+ weight_decay=weight_decay
+ )
+ if lr_decay_rate and lr_step_size:
+ self.__lr_scheduler = lr_scheduler.StepLR(
+ self.__optimizer,
+ gamma=lr_decay_rate,
+ step_size=lr_step_size
+ )
+
+ def __set_transfer_learning_mode(self) -> None:
+
+ state_dict = torch.load(self.__model_path)
+ if self.__model_type == "densenet121":
+ # '.'s are no longer allowed in module names, but previous densenet layers
+ # as provided by the pytorch organization has names that uses '.'s.
+ pattern = re.compile(
+ r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
+ "(?:weight|bias|running_mean|running_var))$"
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ self.__model.load_state_dict(state_dict)
+ self.__model.to(self.__device)
+
+ if self.__transfer_learning_mode == "freeze_all":
+ for param in self.__model.parameters():
+ param.requires_grad = False
+
+ def __load_data(self, batch_size : int = 8) -> None:
+
+ if not self.__data_dir:
+ raise RuntimeError("The dataset directory not yet set.")
+ image_dataset = {
+ x:datasets.ImageFolder(
+ os.path.join(self.__data_dir, x),
+ data_transforms2[x] if self.__model_type=="inception_v3" else data_transforms1[x]
+ )
+ for x in ["train", "test"]
+ }
+ self.__data_loaders = {
+ x:torch.utils.data.DataLoader(
+ image_dataset[x], batch_size=batch_size,
+ shuffle=True
+ )
+ for x in ["train", "test"]
+ }
+ self.__dataset_sizes = {x:len(image_dataset[x]) for x in ["train", "test"]}
+ self.__class_names = image_dataset["train"].classes
+ self.__dataset_name = os.path.basename(self.__data_dir.rstrip(os.path.sep))
+
+ def setDataDirectory(self, data_directory : str = "") -> None:
+ """
+ Sets the directory that contains the training and test dataset. The data directory should contain 'train' and 'test' subdirectories
+ for the training and test datasets.
+
+ In each of these subdirectories, each object must have a dedicated folder and the folder containing images for the object.
+
+ The structure of the 'test' and 'train' folder must be as follows:
+
+ >> train >> class1 >> class1_train_images
+ >> class2 >> class2_train_images
+ >> class3 >> class3_train_images
+ >> class4 >> class4_train_images
+ >> class5 >> class5_train_images
+ >> test >> class1 >> class1_test_images
+ >> class2 >> class2_test_images
+ >> class3 >> class3_test_images
+ >> class4 >> class4_test_images
+ >> class5 >> class5_test_images
+
+ """
+ if os.path.isdir(data_directory):
+ self.__data_dir = data_directory
+ return
+ raise ValueError("expected a path to a directory")
+
+ def setModelTypeAsMobileNetV2(self) -> None:
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
+ :return:
+ """
+ self.__model_type = "mobilenet_v2"
+ self.__training_params = mobilenet_v2_train_params()
+
+ def setModelTypeAsResNet50(self) -> None:
+ """
+ 'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
+ :return:
+ """
+ self.__model_type = "resnet50"
+ self.__training_params = resnet50_train_params()
+
+ def setModelTypeAsInceptionV3(self) -> None:
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
+ :return:
+ """
+ self.__model_type = "inception_v3"
+ self.__training_params = inception_v3_train_params()
+
+ def setModelTypeAsDenseNet121(self) -> None:
+ """
+ 'setModelTypeAsDenseNet()' is used to set the model type to the DenseNet model.
+ :return:
+ """
+ self.__model_type = "densenet121"
+ self.__training_params = densenet121_train_params()
+
+ def freezeAllLayers(self) -> None:
+ """
+ Set the transfer learning mode to freeze all layers.
+
+ NOTE: The last layer (fully connected layer) is trainable.
+ """
+ self.__transfer_learning_mode = "freeze_all"
+
+ def fineTuneAllLayers(self) -> None:
+ """
+ Sets the transfer learning mode to fine-tune the pretrained weights
+ """
+ self.__transfer_learning_mode = "fine_tune_all"
+
+ def trainModel(
+ self,
+ num_experiments : int = 100,
+ batch_size : int = 8,
+ model_directory : str = None,
+ transfer_from_model: str = None,
+ verbose : bool = True
+ ) -> None:
+
+ """
+ 'trainModel()' function starts the model actual training. It accepts the following values:
+ - num_experiments: Also known as epochs, is the number of times the network will process all the images in the training dataset
+ - batch_size: The number of image data that will be loaded into memory at once during training
+ - model_directory: Location where json mapping and trained models will be saved
+ - transfer_from_model: Path to a pre-trained imagenet model that corresponds to the training model type
+ - verbose: Option to enable/disable training logs
+
+ :param num_experiments:
+ :param batch_size:
+ :model_directory:
+ :transfer_from_model:
+ :verbose:
+ :return:
+ """
+
+ # Load dataset
+ self.__load_data(batch_size)
+
+ # Check and effect transfer learning if enabled
+ if transfer_from_model:
+ extension_check(transfer_from_model)
+ self.__model_path = transfer_from_model
+
+ # Load training parameters for the specified model type
+ self.__set_training_param()
+
+
+ # Create output directory to save trained models and json mappings
+ if not model_directory:
+ model_directory = os.path.join(self.__data_dir, "models")
+
+ if not os.path.exists(model_directory):
+ os.mkdir(model_directory)
+
+ # Dump class mappings to json file
+ with open(os.path.join(model_directory, f"{self.__dataset_name}_model_classes.json"), "w") as f:
+ classes_dict = {}
+ class_list = sorted(self.__class_names)
+ for i in range(len(class_list)):
+ classes_dict[str(i)] = class_list[i]
+ json.dump(classes_dict, f)
+
+ # Prep model weights for training
+ since = time.time()
+
+ best_model_weights = copy.deepcopy(self.__model.state_dict())
+ best_acc = 0.0
+ prev_save_name, recent_save_name = "", ""
+
+ # Device check and log
+ print("=" * 50)
+ print("Training with GPU") if self.__device == "cuda" else print("Training with CPU. This might cause slower train.")
+ print("=" * 50)
+
+
+ for epoch in range(num_experiments):
+ if verbose:
+ print(f"Epoch {epoch + 1}/{num_experiments}", "-"*10, sep="\n")
+
+ # each epoch has a training and test phase
+ for phase in ["train", "test"]:
+ if phase == "train":
+ self.__model.train()
+ else:
+ self.__model.eval()
+
+ running_loss = 0.0
+ running_corrects = 0
+
+ # Iterate on the dataset in batches
+ for imgs, labels in tqdm(self.__data_loaders[phase]):
+ imgs = imgs.to(self.__device)
+ labels = labels.to(self.__device)
+
+ self.__optimizer.zero_grad()
+
+ with torch.set_grad_enabled(phase == "train"):
+ output = self.__model(imgs)
+ if self.__model_type == "inception_v3" and type(output) == InceptionOutputs:
+ output = output[0]
+ _, preds = torch.max(output, 1)
+ loss = self.__loss_fn(output, labels)
+
+ if phase=="train":
+ loss.backward()
+ self.__optimizer.step()
+ running_loss += loss.item() * imgs.size(0)
+ running_corrects += torch.sum(preds==labels.data)
+
+ # Compute accuracy and loss metrics post epoch training
+ if phase == "train" and isinstance(self.__lr_scheduler, torch.optim.lr_scheduler.StepLR):
+ self.__lr_scheduler.step()
+
+ epoch_loss = running_loss / self.__dataset_sizes[phase]
+ epoch_acc = running_corrects.double() / self.__dataset_sizes[phase]
+
+ if verbose:
+ print(f"{phase} Loss: {epoch_loss:.4f} Accuracy: {epoch_acc:.4f}")
+ if phase == "test" and epoch_acc > best_acc:
+ best_acc = epoch_acc
+ recent_save_name = self.__model_type+f"-{self.__dataset_name}-test_acc_{best_acc:.5f}_epoch-{epoch}.pt"
+ if prev_save_name:
+ os.remove(os.path.join(model_directory, prev_save_name))
+ best_model_weights = copy.deepcopy(self.__model.state_dict())
+ torch.save(
+ best_model_weights, os.path.join(model_directory, recent_save_name)
+ )
+ prev_save_name = recent_save_name
+
+
+ time_elapsed = time.time() - since
+ print(f"Training completed in {time_elapsed//60:.0f}m {time_elapsed % 60:.0f}s")
+ print(f"Best test accuracy: {best_acc:.4f}")
+
+
+class CustomImageClassification:
+ """
+ An implementation that allows for easy classification of images
+ using the state of the art computer vision classification model
+ trained on custom data.
+
+ The class provides 4 different classification models which are ResNet50, DensesNet121, InceptionV3 and MobileNetV2.
+
+ The following functions are required to be called before a classification can be made
+
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsMobileNetV2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]
+
+ * setModelPath: This is used to specify the absolute path to the trained model file.
+
+ * setJsonPath: This is used to specify the absolute path to the
+ json file saved during the training of the custom model.
+
+ * useCPU (Optional): If you will like to force the image classification to be performed on CPU, call this function.
+
+ * loadModel: Used to load the trained model weights and json data.
+
+ * classifyImage(): Used for classifying an image.
+ """
+ def __init__(self) -> None:
+ self.__model = None
+ self.__model_type = ""
+ self.__model_loaded = False
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__json_path = None
+ self.__class_names = None
+ self.__model_loaded = False
+
+ def __load_image(self, image_input: Union[str, np.ndarray, Image.Image]) -> torch.Tensor:
+ images = []
+ preprocess = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if type(image_input) == str:
+ if os.path.isfile(image_input):
+ img = Image.open(image_input).convert("RGB")
+ images.append(preprocess(img))
+ else:
+ raise ValueError(f"image path '{image_input}' is not found or a valid file")
+ elif type(image_input) == np.ndarray:
+ img = Image.fromarray(image_input).convert("RGB")
+ images.append(preprocess(img))
+ elif "PIL" in str(type(image_input)):
+ img = image_input.convert("RGB")
+ images.append(preprocess(img))
+ else:
+ raise ValueError(f"Invalid image input format")
+
+ return torch.stack(images)
+
+ def __load_classes(self):
+ if self.__json_path:
+ with open(self.__json_path, 'r') as f:
+ self.__class_names = list(json.load(f).values())
+ else:
+ raise ValueError("Invalid json path. Set a valid json mapping path by calling the 'setJsonPath()' function")
+
+ def setModelPath(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ extension_check(path)
+ self.__model_path = path
+ self.__model_loaded = False
+ else:
+ raise ValueError(
+ f"The path '{path}' isn't a valid file. Ensure you specify the path to a valid trained model file."
+ )
+
+ def setJsonPath(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ self.__json_path = path
+ else:
+ raise ValueError(
+ "parameter path should be a valid path to the json mapping file."
+ )
+
+ def setModelTypeAsMobileNetV2(self) -> None:
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
+ :return:
+ """
+ self.__model_type = "mobilenet_v2"
+
+ def setModelTypeAsResNet50(self) -> None:
+ """
+ 'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
+ :return:
+ """
+ self.__model_type = "resnet50"
+
+ def setModelTypeAsInceptionV3(self) -> None:
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
+ :return:
+ """
+ self.__model_type = "inception_v3"
+
+ def setModelTypeAsDenseNet121(self) -> None:
+ """
+ 'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model.
+ :return:
+ """
+ self.__model_type = "densenet121"
+
+ def useCPU(self):
+ """
+ Used to force classification to be done on CPU.
+ By default, classification will occur on GPU compute if available else CPU compute.
+ """
+ self.__device = "cpu"
+ if self.__model_loaded:
+ self.__model_loaded = False
+ self.loadModel()
+
+ def loadModel(self) -> None:
+ """
+ 'loadModel()' function is used to load the model weights into the model architecture from the file path defined
+ in the setModelPath() function.
+ :return:
+ """
+ if not self.__model_loaded:
+ self.__load_classes()
+ try:
+ # change the last layer of the networks to conform to the number
+ # of unique classes in the custom dataset used to train the custom
+ # model
+
+ if self.__model_type == "resnet50":
+ self.__model = resnet50(pretrained=False)
+ in_features = self.__model.fc.in_features
+ self.__model.fc = nn.Linear(in_features, len(self.__class_names))
+ elif self.__model_type == "mobilenet_v2":
+ self.__model = mobilenet_v2(pretrained=False)
+ in_features = self.__model.classifier[1].in_features
+ self.__model.classifier[1] = nn.Linear(in_features, len(self.__class_names))
+ elif self.__model_type == "inception_v3":
+ self.__model = inception_v3(pretrained=False)
+ in_features = self.__model.fc.in_features
+ self.__model.fc = nn.Linear(in_features, len(self.__class_names))
+ elif self.__model_type == "densenet121":
+ self.__model = densenet121(pretrained=False)
+ in_features = self.__model.classifier.in_features
+ self.__model.classifier = nn.Linear(in_features, len(self.__class_names))
+ else:
+ raise RuntimeError("Unknown model type.\nEnsure the model type is properly set.")
+
+ state_dict = torch.load(self.__model_path, map_location=self.__device)
+
+ if self.__model_type == "densenet121":
+ # '.'s are no longer allowed in module names, but previous densenet layers
+ # as provided by the pytorch organization has names that uses '.'s.
+ pattern = re.compile(
+ r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
+ "(?:weight|bias|running_mean|running_var))$"
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ self.__model.load_state_dict(state_dict)
+ self.__model.to(self.__device).eval()
+ self.__model_loaded = True
+
+ except Exception as e:
+ raise Exception("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+ def classifyImage(self, image_input: Union[str, np.ndarray, Image.Image], result_count: int) -> Tuple[List[str], List[float]]:
+ """
+ 'classifyImage()' function is used to classify a given image by receiving the following arguments:
+ * image_input: file path, numpy array or PIL image of the input image.
+ * result_count (optional) , the number of classifications to be sent which must be whole numbers between 1 and total number of classes the model is trained to classify.
+
+ This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
+ contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
+ contains the percentage probability of each object class. The position of each object class in the 'classification_results'
+ array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
+
+ :param image_input:
+ :param result_count:
+ :return classification_results, classification_probabilities:
+ """
+ if not self.__model_loaded:
+ raise RuntimeError(
+ "Model not yet loaded. You need to call '.loadModel()' before performing image classification"
+ )
+
+ images = self.__load_image(image_input)
+ images = images.to(self.__device)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ topN_prob, topN_catid = torch.topk(probabilities, result_count)
+
+ predictions = [
+ [
+ (self.__class_names[topN_catid[i][j]], topN_prob[i][j].item()*100)
+ for j in range(topN_prob.shape[1])
+ ]
+ for i in range(topN_prob.shape[0])
+ ]
+
+ labels_pred = []
+ probabilities_pred = []
+
+ for idx, pred in enumerate(predictions):
+ for label, score in pred:
+ labels_pred.append(label)
+ probabilities_pred.append(round(score, 4))
+
+ return labels_pred, probabilities_pred
\ No newline at end of file
diff --git a/imageai/Classification/Custom/data_transformation.py b/imageai/Classification/Custom/data_transformation.py
new file mode 100644
index 00000000..77938bda
--- /dev/null
+++ b/imageai/Classification/Custom/data_transformation.py
@@ -0,0 +1,43 @@
+from torchvision import transforms
+
+data_transforms1 = {
+ "train":transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(
+ [0.485, 0.456, 0.406],
+ [0.229, 0.224, 0.225]
+ )
+ ]),
+ "test": transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(
+ [0.485, 0.456, 0.406],
+ [0.229, 0.224, 0.225]
+ )
+ ])
+ }
+
+data_transforms2 = {
+ "train":transforms.Compose([
+ transforms.RandomResizedCrop(299),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(
+ [0.485, 0.456, 0.406],
+ [0.229, 0.224, 0.225]
+ )
+ ]),
+ "test": transforms.Compose([
+ transforms.Resize(299),
+ transforms.CenterCrop(299),
+ transforms.ToTensor(),
+ transforms.Normalize(
+ [0.485, 0.456, 0.406],
+ [0.229, 0.224, 0.225]
+ )
+ ])
+ }
diff --git a/imageai/Classification/Custom/training_params.py b/imageai/Classification/Custom/training_params.py
new file mode 100644
index 00000000..9f9139c2
--- /dev/null
+++ b/imageai/Classification/Custom/training_params.py
@@ -0,0 +1,53 @@
+import torch
+from torch.optim import SGD
+from torchvision.models import resnet50, inception_v3, mobilenet_v2, densenet121
+
+model = resnet50(pretrained=False)
+
+
+def resnet50_train_params():
+ model = resnet50(pretrained=False)
+ return {
+ "model": model,
+ "optimizer": SGD,
+ "weight_decay":1e-4,
+ "lr":0.1,
+ "lr_decay_rate": None,
+ "lr_step_size": None
+ }
+
+def inception_v3_train_params():
+ model = inception_v3(pretrained=False, init_weights=False)
+
+ return {
+ "model": model,
+ "optimizer": SGD,
+ "weight_decay":0,
+ "lr":0.045,
+ "lr_decay_rate": 0.94,
+ "lr_step_size":2
+ }
+
+def mobilenet_v2_train_params():
+ model = mobilenet_v2(pretrained=False)
+
+ return {
+ "model": model,
+ "optimizer": SGD,
+ "weight_decay":4e-5,
+ "lr":0.045,
+ "lr_decay_rate": 0.98,
+ "lr_step_size":1
+ }
+
+def densenet121_train_params():
+ model = densenet121(pretrained=False)
+
+ return {
+ "model": model,
+ "optimizer": SGD,
+ "weight_decay":1e-4,
+ "lr":0.1,
+ "lr_decay_rate": None,
+ "lr_step_size":None,
+ }
\ No newline at end of file
diff --git a/imageai/Classification/README.md b/imageai/Classification/README.md
new file mode 100644
index 00000000..7ff9a2cb
--- /dev/null
+++ b/imageai/Classification/README.md
@@ -0,0 +1,123 @@
+# ImageAI : Image Classification
+
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
+
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
+
+
+[](https://jarvis.genxr.co)
+
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
+
+
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
+
+
+[](https://www.genxr.co/theia-engine)
+
+
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
+
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+### TABLE OF CONTENTS
+- :white_square_button: First Prediction
+- :white_square_button: Documentation
+
+ImageAI provides 4 different algorithms and model types to perform image prediction.
+To perform image prediction on any picture, take the following simple steps. The 4 algorithms provided for
+ image prediction include **MobileNetV2**, **ResNet50**, **InceptionV3** and **DenseNet121**. Each of these
+ algorithms have individual model files which you must use depending on the choice of your algorithm. To download the
+ model file for your choice of algorithm, click on any of the links below:
+
+- **[MobileNetV2](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth)** _(Size = 4.82 mb, fastest prediction time and moderate accuracy)_
+- **[ResNet50](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth)** by Microsoft Research _(Size = 98 mb, fast prediction time and high accuracy)_
+ - **[InceptionV3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth)** by Google Brain team _(Size = 91.6 mb, slow prediction time and higher accuracy)_
+ - **[DenseNet121](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth)** by Facebook AI Research _(Size = 31.6 mb, slower prediction time and highest accuracy)_
+
+ Great! Once you have downloaded this model file, start a new python project, and then copy the model file to your project
+ folder where your python files (.py files) will be . Download the image below, or take any image on your computer
+ and copy it to your python project's folder. Then create a python file and give it a name; an example is `FirstPrediction.py`.
+ Then write the code below into the python file:
+
+### FirstPrediction.py
+
+
+```python
+from imageai.Classification import ImageClassification
+import os
+
+execution_path = os.getcwd()
+
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth"))
+prediction.loadModel()
+
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction , " : " , eachProbability)
+```
+
+Sample Result:
+
+
+```
+convertible : 52.459555864334106
+sports_car : 37.61284649372101
+pickup : 3.1751200556755066
+car_wheel : 1.817505806684494
+minivan : 1.7487050965428352
+```
+
+The code above works as follows:
+```python
+from imageai.Classification import ImageClassification
+import os
+```
+The code above imports the `ImageAI` library and the python `os` class.
+```python
+execution_path = os.getcwd()
+```
+The above line obtains the path to the folder that contains your python file (in this example, your FirstPrediction.py).
+
+```python
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth"))
+```
+In the lines above, we created and instance of the `ImagePrediction()` class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()` in the second line and then we set the model path of the prediction object to the path of the model file (`resnet50-19c8e357.pth`) we copied to the python file folder in the third line.
+
+```python
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
+```
+
+In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 1000) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.
+
+```python
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction, " : " , eachProbability)
+```
+The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.
+
+
+
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:**
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+
diff --git a/imageai/Classification/__init__.py b/imageai/Classification/__init__.py
new file mode 100644
index 00000000..1ff21348
--- /dev/null
+++ b/imageai/Classification/__init__.py
@@ -0,0 +1,242 @@
+import os, re
+from typing import Union
+from typing import List, Tuple
+import numpy as np
+import torch
+from torchvision.models import resnet50, densenet121, mobilenet_v2, inception_v3
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+import traceback
+from ..backend_check.model_extension import extension_check
+
+classification_models = {
+ "resnet50": {
+ "model": resnet50(pretrained=False)
+ },
+ "densenet121": {
+ "model": densenet121(pretrained=False)
+ },
+ "inceptionv3": {
+ "model": inception_v3(pretrained=False)
+ },
+ "mobilenetv2": {
+ "model": mobilenet_v2(pretrained=False)
+ }
+}
+
+class ImageClassification:
+ """
+ This is the image classification class in the ImageAI library. It allows you to classify objects into all the 1000 different classes in the ImageNet dataset [ https://www.kaggle.com/c/imagenet-object-localization-challenge/overview/description ].
+
+ The class provides 4 different classification models which are ResNet50, DensesNet121, InceptionV3 and MobileNetV2.
+
+ The following functions are required to be called before a classification can be made
+
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsMobileNetV2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]
+
+ * setModelPath: This is used to specify the absolute path to a pretrained model file. Download any of the files in this release -> https://github.com/OlafenwaMoses/ImageAI/releases/tag/3.0.0-pretrained
+
+ * useCPU (Optional): If you will like to force the image classification to be performed on CPU, call this function.
+
+ * loadModel: Used to load the pretrained model weights
+
+ * classifyImage(): Used for classifying an image.
+
+ """
+ def __init__(self) -> None:
+ self.__model_type:str = None
+ self.__model:Union[resnet50, densenet121, mobilenet_v2, inception_v3] = None
+ self.__model_path: str = None
+ self.__classes_path: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "imagenet_classes.txt")
+ self.__model_loaded: bool = False
+ self.__device: str = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__classes: List[str] = []
+
+ def setModelPath(self, path: str):
+ """
+ 'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
+ available 4 model types. The model path must correspond to the model type set for the classification instance object.
+ :param model_path:
+ :return:
+ """
+ if os.path.isfile(path):
+ extension_check(path)
+ self.__model_path = path
+ else:
+ raise ValueError(
+ f"The path '{path}' isn't a valid file. Ensure you specify the path to a valid trained model file."
+ )
+
+ def __load_classes(self) -> List[str]:
+ with open(self.__classes_path) as f:
+ self.__classes = [c.strip() for c in f.readlines()]
+
+ def __load_image(self, image_input: Union[str, np.ndarray, Image.Image]) -> torch.Tensor:
+ images = []
+ preprocess = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if type(image_input) == str:
+ if os.path.isfile(image_input):
+ img = Image.open(image_input).convert("RGB")
+ images.append(preprocess(img))
+ else:
+ raise ValueError(f"image path '{image_input}' is not found or a valid file")
+ elif type(image_input) == np.ndarray:
+ img = Image.fromarray(image_input).convert("RGB")
+ images.append(preprocess(img))
+ elif "PIL" in str(type(image_input)):
+ img = image_input.convert("RGB")
+ images.append(preprocess(img))
+ else:
+ raise ValueError(f"Invalid image input format")
+
+ return torch.stack(images)
+
+ def setModelTypeAsResNet50(self):
+ """
+ 'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
+ :return:
+ """
+ if self.__model_type == None:
+ self.__model_type = "resnet50"
+
+ def setModelTypeAsDenseNet121(self):
+ """
+ 'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model.
+ :return:
+ """
+ if self.__model_type == None:
+ self.__model_type = "densenet121"
+
+ def setModelTypeAsInceptionV3(self):
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
+ :return:
+ """
+ if self.__model_type == None:
+ self.__model_type = "inceptionv3"
+
+ def setModelTypeAsMobileNetV2(self):
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
+ :return:
+ """
+ if self.__model_type == None:
+ self.__model_type = "mobilenetv2"
+
+ def useCPU(self):
+ """
+ Used to force classification to be done on CPU.
+ By default, classification will occur on GPU compute if available else CPU compute.
+ """
+ self.__device = "cpu"
+ if self.__model_loaded:
+ self.__model_loaded = False
+ self.loadModel()
+
+ def loadModel(self):
+ """
+ 'loadModel()' function is used to load the model weights into the model architecture from the file path defined
+ in the setModelPath() function.
+ :return:
+ """
+ if not self.__model_loaded:
+ try:
+ if self.__model_path == None:
+ raise ValueError(
+ "Model path not specified. Call '.setModelPath()' and parse the path to the model file before loading the model."
+ )
+
+ if self.__model_type in classification_models.keys():
+ self.__model = classification_models[self.__model_type]["model"]
+ else:
+ raise ValueError(
+ f"Model type '{self.__model_type}' not supported."
+ )
+ state_dict = torch.load(self.__model_path)
+ if self.__model_type == "densenet121":
+ # '.'s are no longer allowed in module names, but previous densenet layers
+ # as provided by the Pytorch's model zoon has names that uses '.'s.
+ pattern = re.compile(
+ r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
+ "(?:weight|bias|running_mean|running_var))$"
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ self.__model.load_state_dict(
+ state_dict
+ )
+ self.__model.to(self.__device)
+ self.__model_loaded = True
+ self.__model.eval()
+ self.__load_classes()
+ except Exception:
+ print(traceback.print_exc())
+ print("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+
+
+ def classifyImage(self, image_input: Union[str, np.ndarray, Image.Image], result_count: int=5) -> Tuple[List[str], List[float]]:
+
+ """
+ 'classifyImage()' function is used to classify a given image by receiving the following arguments:
+ * image_input: file path, numpy array or PIL image of the input image.
+ * result_count (optional) , the number of classifications to be sent which must be whole numbers between
+ 1 and 1000. The default is 5.
+
+ This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
+ contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
+ contains the percentage probability of each object class. The position of each object class in the 'classification_results'
+ array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
+
+ :param image_input:
+ :param result_count:
+ :return classification_results, classification_probabilities:
+ """
+
+ if not self.__model_loaded:
+ raise RuntimeError(
+ "Model not yet loaded. You need to call '.loadModel()' before performing image classification"
+ )
+
+ images = self.__load_image(image_input)
+ images = images.to(self.__device)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ topN_prob, topN_catid = torch.topk(probabilities, result_count)
+
+ predictions = [
+ [
+ (self.__classes[topN_catid[i][j]], topN_prob[i][j].item()*100)
+ for j in range(topN_prob.shape[1])
+ ]
+ for i in range(topN_prob.shape[0])
+ ]
+
+ labels_pred = []
+ probabilities_pred = []
+
+ for idx, pred in enumerate(predictions):
+ for label, score in pred:
+ labels_pred.append(label)
+ probabilities_pred.append(round(score, 4))
+
+ return labels_pred, probabilities_pred
+
+
+
+
\ No newline at end of file
diff --git a/imageai/Classification/imagenet_classes.txt b/imageai/Classification/imagenet_classes.txt
new file mode 100644
index 00000000..888d6f51
--- /dev/null
+++ b/imageai/Classification/imagenet_classes.txt
@@ -0,0 +1,1000 @@
+tench
+goldfish
+great white shark
+tiger shark
+hammerhead
+electric ray
+stingray
+cock
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+robin
+bulbul
+jay
+magpie
+chickadee
+water ouzel
+kite
+bald eagle
+vulture
+great grey owl
+European fire salamander
+common newt
+eft
+spotted salamander
+axolotl
+bullfrog
+tree frog
+tailed frog
+loggerhead
+leatherback turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+common iguana
+American chameleon
+whiptail
+agama
+frilled lizard
+alligator lizard
+Gila monster
+green lizard
+African chameleon
+Komodo dragon
+African crocodile
+American alligator
+triceratops
+thunder snake
+ringneck snake
+hognose snake
+green snake
+king snake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+rock python
+Indian cobra
+green mamba
+sea snake
+horned viper
+diamondback
+sidewinder
+trilobite
+harvestman
+scorpion
+black and gold garden spider
+barn spider
+garden spider
+black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie chicken
+peacock
+quail
+partridge
+African grey
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+American egret
+bittern
+crane
+limpkin
+European gallinule
+American coot
+bustard
+ruddy turnstone
+red-backed sandpiper
+redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog
+Pekinese
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound
+basset
+beagle
+bloodhound
+bluetick
+black-and-tan coonhound
+Walker hound
+English foxhound
+redbone
+borzoi
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound
+Norwegian elkhound
+otterhound
+Saluki
+Scottish deerhound
+Weimaraner
+Staffordshire bullterrier
+American Staffordshire terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier
+Airedale
+cairn
+Australian terrier
+Dandie Dinmont
+Boston bull
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier
+Tibetan terrier
+silky terrier
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla
+English setter
+Irish setter
+Gordon setter
+Brittany spaniel
+clumber
+English springer
+Welsh springer spaniel
+cocker spaniel
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog
+Shetland sheepdog
+collie
+Border collie
+Bouvier des Flandres
+Rottweiler
+German shepherd
+Doberman
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard
+Eskimo dog
+malamute
+Siberian husky
+dalmatian
+affenpinscher
+basenji
+pug
+Leonberg
+Newfoundland
+Great Pyrenees
+Samoyed
+Pomeranian
+chow
+keeshond
+Brabancon griffon
+Pembroke
+Cardigan
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf
+white wolf
+red wolf
+coyote
+dingo
+dhole
+African hunting dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby
+tiger cat
+Persian cat
+Siamese cat
+Egyptian cat
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+ice bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+long-horned beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket
+walking stick
+cockroach
+mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+admiral
+ringlet
+monarch
+cabbage butterfly
+sulphur butterfly
+lycaenid
+starfish
+sea urchin
+sea cucumber
+wood rabbit
+hare
+Angora
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+sorrel
+zebra
+hog
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram
+bighorn
+ibex
+hartebeest
+impala
+gazelle
+Arabian camel
+llama
+weasel
+mink
+polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas
+baboon
+macaque
+langur
+colobus
+proboscis monkey
+marmoset
+capuchin
+howler monkey
+titi
+spider monkey
+squirrel monkey
+Madagascar cat
+indri
+Indian elephant
+African elephant
+lesser panda
+giant panda
+barracouta
+eel
+coho
+rock beauty
+anemone fish
+sturgeon
+gar
+lionfish
+puffer
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibian
+analog clock
+apiary
+apron
+ashcan
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint
+Band Aid
+banjo
+bannister
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+barrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap
+bath towel
+bathtub
+beach wagon
+beacon
+beaker
+bearskin
+beer bottle
+beer glass
+bell cote
+bib
+bicycle-built-for-two
+bikini
+binder
+binoculars
+birdhouse
+boathouse
+bobsled
+bolo tie
+bonnet
+bookcase
+bookshop
+bottlecap
+bow
+bow tie
+brass
+brassiere
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+bullet train
+butcher shop
+cab
+caldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+carpenter's kit
+carton
+car wheel
+cash machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+cellular telephone
+chain
+chainlink fence
+chain mail
+chain saw
+chest
+chiffonier
+chime
+china cabinet
+Christmas stocking
+church
+cinema
+cleaver
+cliff dwelling
+cloak
+clog
+cocktail shaker
+coffee mug
+coffeepot
+coil
+combination lock
+computer keyboard
+confectionery
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+crane
+crash helmet
+crate
+crib
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishrag
+dishwasher
+disk brake
+dock
+dogsled
+dome
+doormat
+drilling platform
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa
+file
+fireboat
+fire engine
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gasmask
+gas pump
+goblet
+go-kart
+golf ball
+golfcart
+gondola
+gong
+gown
+grand piano
+greenhouse
+grille
+grocery store
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower
+hand-held computer
+handkerchief
+hard disc
+harmonica
+harp
+harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoopskirt
+horizontal bar
+horse cart
+hourglass
+iPod
+iron
+jack-o'-lantern
+jean
+jeep
+jersey
+jigsaw puzzle
+jinrikisha
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+liner
+lipstick
+Loafer
+lotion
+loudspeaker
+loupe
+lumbermill
+magnetic compass
+mailbag
+mailbox
+maillot
+maillot
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine chest
+megalith
+microphone
+microwave
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter
+mountain bike
+mountain tent
+mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+organ
+oscilloscope
+overskirt
+oxcart
+oxygen mask
+packet
+paddle
+paddlewheel
+padlock
+paintbrush
+pajama
+palace
+panpipe
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+passenger car
+patio
+pay-phone
+pedestal
+pencil box
+pencil sharpener
+perfume
+Petri dish
+photocopier
+pick
+pickelhaube
+picket fence
+pickup
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate
+pitcher
+plane
+planetarium
+plastic bag
+plate rack
+plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+pop bottle
+pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+projectile
+projector
+puck
+punching bag
+purse
+quill
+quilt
+racer
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+rubber eraser
+rugby ball
+rule
+running shoe
+safe
+safety pin
+saltshaker
+sandal
+sarong
+sax
+scabbard
+scale
+school bus
+schooner
+scoreboard
+screen
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe shop
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule
+sliding door
+slot
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar dish
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch
+stove
+strainer
+streetcar
+stretcher
+studio couch
+stupa
+submarine
+suit
+sundial
+sunglass
+sunglasses
+sunscreen
+suspension bridge
+swab
+sweatshirt
+swimming trunks
+swing
+switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy
+television
+tennis ball
+thatch
+theater curtain
+thimble
+thresher
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toyshop
+tractor
+trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright
+vacuum
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+warplane
+washbasin
+washer
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool
+worm fence
+wreck
+yawl
+yurt
+web site
+comic book
+crossword puzzle
+street sign
+traffic light
+book jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+ice lolly
+French loaf
+bagel
+pretzel
+cheeseburger
+hotdog
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce
+dough
+meat loaf
+pizza
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff
+coral reef
+geyser
+lakeside
+promontory
+sandbar
+seashore
+valley
+volcano
+ballplayer
+groom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+hip
+buckeye
+coral fungus
+agaric
+gyromitra
+stinkhorn
+earthstar
+hen-of-the-woods
+bolete
+ear
+toilet tissue
\ No newline at end of file
diff --git a/imageai/Detection/Custom/CUSTOMDETECTION.md b/imageai/Detection/Custom/CUSTOMDETECTION.md
new file mode 100644
index 00000000..d8b279ea
--- /dev/null
+++ b/imageai/Detection/Custom/CUSTOMDETECTION.md
@@ -0,0 +1,208 @@
+# ImageAI : Custom Object Detection
+
+### TABLE OF CONTENTS
+
+- :white_square_button: Custom Object Detection
+- :white_square_button: Object Detection, Extraction and Fine-tune
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Image Input & Output Types
+- :white_square_button: Documentation
+
+
+ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image using your own **custom YOLOv3 or TinyYOLOv3 model** and the corresponding **.json** generated during the training. To test the custom object detection, you can download a sample custom model we have trained to detect the Hololens headset and its **.json** file via the links below:
+
+* [**yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt) _(Size = 236 mb)_
+* [**hololens-yolo_yolov3_detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json)
+
+
+ Once you download the custom object detection model file, you should copy the model file to the your project folder where your **.py** files will be.
+ Then create a python file and give it a name; an example is FirstCustomDetection.py. Then write the code below into the python file:
+
+### FirstCustomDetection.py
+
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+detector.loadModel()
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+```
+
+Sample Result - Input:
+
+
+
+ Output:
+
+
+
+```
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+```
+
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+```
+ In the 3 lines above , we import the **ImageAI custom object detection** class in the first line, created the class instance on the second line and set the model type to YOLOv3.
+
+```python
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+detector.loadModel()
+```
+
+ In the 3 lines above, we specified the file path to our downloaded model file in the first line , specified the path to our **hololens-yolo_yolov3_detection_config.json** file in the second line and loaded the model on the third line.
+
+```python
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+```
+
+In the 3 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our test image, and the path to the new
+ image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding
+ to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object),
+`percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).
+
+
+
+
+### Object Detection, Extraction and Fine-tune
+
+
+In the examples we used above, we ran the object detection on an image and it
+returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image and save it independently.
+
+
+
+In the example code below which is very identical to the previous object detection code, we will save each object detected as a separate image.
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+detector.loadModel()
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+```
+
+
+Sample Result: Output Images
+
+
+
+
+
+
+
+
+
+
+
+
+Let us review the part of the code that perform the object detection and extract the images:
+
+```python
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+```
+
+In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image part, and an
+extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image
+and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory
+ which is the `output image path + "-objects"`. Then it saves all the extracted images into this new directory with
+ each image's name being the `detected object name + "-" + a number` which corresponds to the order at which the objects
+ were detected.
+
+This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The
+ first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths
+ to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the
+ first array.
+
+
+
+### And one important feature you need to know!
+
+You will recall that the percentage probability
+ for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter
+ `minimum_percentage_probability` , whose default value is `30` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected
+ object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the
+ detection results. You fine-tune the object
+ detection by setting `minimum_percentage_probability` equal to a smaller value to detect more number of objects or higher value to detect less number of objects.
+
+
+
+
+### Hiding/Showing Object Name and Probability
+
+
+**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `'display_object_name'` and `'display_percentage_probability'` can be set to True of False individually. Take a look at the code below:
+```python
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "holo2.jpg"), output_image_path=os.path.join(execution_path , "holo2_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
+```
+
+In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.
+
+**Result**
+
+
+
+
+### Image Input & Output Types
+
+
+**ImageAI** custom object detection supports 2 input types of inputs which are **file path to image file**(default) and **numpy array of an image**
+as well as 2 types of output which are image **file**(default) and numpy **array **.
+This means you can now perform object detection in production applications such as on a web server and system
+ that returns file in any of the above stated formats.
+ To perform object detection with numpy array input, you just need to state the input type
+in the `.detectObjectsFromImage()` function. See example below.
+
+```python
+detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "holo2-detected.jpg")) # For numpy array input type
+```
+To perform object detection with numpy array output you just need to state the output type
+in the `.detectObjectsFromImage()` function. See example below.
+
+```python
+detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="holo2.jpg" ) # For numpy array output type
+```
+
+
+
+### Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
\ No newline at end of file
diff --git a/imageai/Detection/Custom/CUSTOMDETECTIONTRAINING.md b/imageai/Detection/Custom/CUSTOMDETECTIONTRAINING.md
new file mode 100644
index 00000000..63da9635
--- /dev/null
+++ b/imageai/Detection/Custom/CUSTOMDETECTIONTRAINING.md
@@ -0,0 +1,224 @@
+# ImageAI : Custom Detection Model Training
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
+
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
+
+
+[](https://jarvis.genxr.co)
+
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
+
+
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
+
+
+[](https://www.genxr.co/theia-engine)
+
+
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
+
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+**ImageAI** provides the most simple and powerful approach to training custom object detection models
+using the YOLOv3 architeture, which
+which you can load into the `imageai.Detection.Custom.CustomObjectDetection` class. This allows
+ you to train your own **YOLOv3** or **TinyYOLOv3** model on any set of images that corresponds to any type of objects of interest.
+The training process generates a JSON file that maps the objects names in your image dataset and the detection anchors, as well as creates lots of models. In choosing the best model for your custom object detection task, an `evaluateModel()` function has been provided to compute the **mAP** of your saved models by allowing you to state your desired **IoU** and **Non-maximum Suppression** values. Then you can perform custom
+object detection using the model and the JSON file generated.
+
+### TABLE OF CONTENTS
+- :white_square_button: Preparing your custom dataset
+- :white_square_button: Training on your custom Dataset
+- :white_square_button: Evaluating your saved detection models' mAP
+
+
+### Preparing your custom dataset
+
+
+To train a custom detection model, you need to prepare the images you want to use to train the model.
+You will prepare the images as follows:
+
+1. Decide the type of object(s) you want to detect and collect about **200 (minimum recommendation)** or more picture of each of the object(s)
+2. Once you have collected the images, you need to annotate the object(s) in the images. **ImageAI** uses the **YOLO** for image annotation. You can generate this annotation for your images using the easy to use [**LabelImg**](https://github.com/tzutalin/labelImg) image annotation tool, available for Windows, Linux and MacOS systems. Open the link below to install the annotation tool. See: [https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)
+3. When you are done annotating your images, **annotation .txt** files will be generated for each image in your dataset. The **annotation .txt** file describes each or **all** of the objects in the image. For example, if each image your image names are **image(1).jpg**, **image(2).jpg**, **image(3).jpg** till **image(z).jpg**; the corresponding annotation for each of the images will be **image(1).txt**, **image(2).txt**, **image(3).txt** till **image(z).txt**.
+4. Once you have the annotations for all your images, create a folder for your dataset (E.g headsets) and in this parent folder, create child folders **train** and **validation**
+5. In the train folder, create **images** and **annotations**
+ sub-folders. Put about 70-80% of your dataset of each object's images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
+6. In the validation folder, create **images** and **annotations** sub-folders. Put the rest of your dataset images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
+7. Once you have done this, the structure of your image dataset folder should look like below:
+ ```
+ >> train >> images >> img_1.jpg (shows Object_1)
+ >> images >> img_2.jpg (shows Object_2)
+ >> images >> img_3.jpg (shows Object_1, Object_3 and Object_n)
+ >> annotations >> img_1.txt (describes Object_1)
+ >> annotations >> img_2.txt (describes Object_2)
+ >> annotations >> img_3.txt (describes Object_1, Object_3 and Object_n)
+
+ >> validation >> images >> img_151.jpg (shows Object_1, Object_3 and Object_n)
+ >> images >> img_152.jpg (shows Object_2)
+ >> images >> img_153.jpg (shows Object_1)
+ >> annotations >> img_151.txt (describes Object_1, Object_3 and Object_n)
+ >> annotations >> img_152.txt (describes Object_2)
+ >> annotations >> img_153.txt (describes Object_1)
+ ```
+8. You can train your custom detection model completely from scratch or use transfer learning (recommended for better accuracy) from a pre-trained YOLOv3 model. Also, we have provided a sample annotated Hololens and Headsets (Hololens and Oculus) dataset for you to train with. Download the pre-trained YOLOv3 model and the sample datasets in the link below.
+
+Download dataset `hololens-yolo.zip` [here](https://github.com/OlafenwaMoses/ImageAI/releases/tag/test-resources-v3) and pre-trained model `yolov3.pt` [here](https://github.com/OlafenwaMoses/ImageAI/releases/tag/3.0.0-pretrained)
+
+
+### Training on your custom dataset
+
+
+Before you start training your custom detection model, kindly take note of the following:
+
+- The default **batch_size** is 4. If you are training with **Google Colab**, this will be fine. However, I will advice you use a more powerful GPU than the K80 offered by Colab as the higher your **batch_size (8, 16)**, the better the accuracy of your detection model.
+
+Then your training code goes as follows:
+```python
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens-yolo")
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
+# In the above,when training for detecting multiple objects,
+#set object_names_array=["object1", "object2", "object3",..."objectz"]
+trainer.trainModel()
+```
+
+ Yes! Just 6 lines of code and you can train object detection models on your custom dataset.
+Now lets take a look at how the code above works.
+
+```python
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens-yolo")
+```
+
+In the first line, we import the **ImageAI** detection model training class, then we define the model trainer in the second line,
+ we set the network type in the third line and set the path to the image dataset we want to train the network on.
+
+```python
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
+```
+
+
+In the line above, we configured our detection model trainer. The parameters we stated in the function as as below:
+
+- **num_objects** : this is an array containing the names of the objects in our dataset
+- **batch_size** : this is to state the batch size for the training
+- **num_experiments** : this is to state the number of times the network will train over all the training images,
+ which is also called epochs
+- **train_from_pretrained_model(optional)** : this is to train using transfer learning from a pre-trained **YOLOv3** model
+
+```python
+trainer.trainModel()
+```
+
+
+When you start the training, you should see something like this in the console:
+```
+Generating anchor boxes for training images...
+thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
+n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
+====================
+Pretrained YOLOv3 model loaded to initialize weights
+====================
+Epoch 1/100
+----------
+Train:
+30it [00:14, 2.09it/s]
+ box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
+Validation:
+15it [01:45, 7.05s/it]
+ recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030
+
+Epoch 2/100
+----------
+Train:
+30it [00:07, 4.25it/s]
+ box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
+Validation:
+15it [01:37, 6.53s/it]
+ recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
+.
+.
+.
+.
+
+```
+
+Let us explain the details shown above:
+```
+Generating anchor boxes for training images...
+thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
+n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
+====================
+Pretrained YOLOv3 model loaded to initialize weights
+====================
+```
+
+The above details signifies the following:
+- **ImageAI** autogenerates the best match detection **anchor boxes** for your image dataset.
+
+- A the pretrained **yolov3.pt** was loaded to initalize the weights used to train the model.
+
+```
+Epoch 1/100
+----------
+Train:
+30it [00:14, 2.09it/s]
+ box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
+Validation:
+15it [01:45, 7.05s/it]
+ recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030
+
+Epoch 2/100
+----------
+Train:
+30it [00:07, 4.25it/s]
+ box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
+Validation:
+15it [01:37, 6.53s/it]
+ recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
+```
+
+- The above signifies the progress of the training.
+- For each experiment (Epoch), a number of metrics are computed. The important once fo chosing an accuate models is detailed below
+ - The bounding box loss `box loss` is reported and expected to drop as the training progresses
+ - The object localization loss `object loss` is reported and expected to drop as the training progresses
+ - The class loss `class loss` is reported and expected to drop as the training progresses. If the class loss persists at 0.0000, it's because your dataset has a single class.
+ - The `mAP50` and `mAP0.5-0.95` metrics are expected to increase. This signifies the models accuracy increases. There might be flunctuations in these metrics sometimes.
+- For each increase in the `mAP50` after an experiment, a model is saved in the **hololens-yolo/models** folder. The higher the mAP50, the better the model.
+
+Once you are done training, you can visit the link below for performing object detection with your **custom detection model** and **detection_config.json** file.
+
+[Detection/Custom/CUSTOMDETECTION.md](./CUSTOMDETECTION.md)
+
+
+### >> Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
+
+
+
+
+
+
diff --git a/imageai/Detection/Custom/CUSTOMVIDEODETECTION.md b/imageai/Detection/Custom/CUSTOMVIDEODETECTION.md
new file mode 100644
index 00000000..0c0e9b03
--- /dev/null
+++ b/imageai/Detection/Custom/CUSTOMVIDEODETECTION.md
@@ -0,0 +1,246 @@
+# ImageAI : Custom Video Object Detection, Tracking and Analysis
+
+### TABLE OF CONTENTS
+
+- :white_square_button: First Custom Video Object Detection
+- :white_square_button: Camera / Live Stream Video Detection
+- :white_square_button: Video Analysis
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Frame Detection Intervals
+- :white_square_button: Video Detection Timeout (NEW)
+- :white_square_button: Documentation
+
+
+ImageAI provides convenient, flexible and powerful methods to perform object detection on videos using your own **custom YOLOv3 model** and the corresponding **.json** file generated during the training. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
+To test the custom video object detection,you can download a sample custom model we have trained to detect the Hololens headset and its **.json** file via the links below:
+
+* [**yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt) _(Size = 236 mb)_
+* [**hololens-yolo_yolov3_detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json)
+
+
+Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
+ installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this
+ experiment as it has an NVIDIA K80 GPU available for free.
+
+ Once you download the custom object detection model and JSON files, you should copy the model and the JSON files to the your project folder where your .py files will be.
+ Then create a python file and give it a name; an example is FirstCustomVideoObjectDetection.py. Then write the code below into the python file:
+
+
+### FirstCustomVideoObjectDetection.py
+
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+[**Input Video**](../../../data-videos/holo1.mp4)
+[](../../../data-videos/holo1.mp4)
+[**Output Video**](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
+[](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
+
+
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+```
+
+In the 3 lines above , we import the **ImageAI custom video object detection** class in the first line, import the **os** in the second line and obtained
+ the path to folder where our python file runs.
+```python
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+video_detector.loadModel()
+```
+In the 4 lines above, we created a new instance of the `CustomVideoObjectDetection` class in the first line, set the model type to YOLOv3 in the second line,
+ set the model path to our custom YOLOv3 model file in the third line, specified the path to the model's corresponding **hololens-yolo_yolov3_detection_config.json** in the fourth line and load the model in the fifth line.
+
+```python
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+In the code above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new
+ video (without the extension, it saves a .mp4 video by default) which the function will save, the number of frames per second (fps) that
+ you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video
+ which contains boxes and percentage probabilities rendered on objects detected in the video.
+
+
+### Camera / Live Stream Video Detection
+
+
+**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s **VideoCapture()** function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s **detectObjectsFromVideo()** function. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter **camera_input** which replaces the **input_file_path** that is used for video file.
+
+
+### Video Analysis
+
+
+**ImageAI** now provide commercial-grade video analysis in the Custom Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.
+
+For video analysis, the **detectObjectsFromVideo()** now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.
+
+To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the **per_frame_function**, **per_second_function**, **per_minute_function** and **video_complete_function** parameters in the detection function. Find below examples of video analysis functions.
+
+```python
+def forFrame(frame_number, output_array, output_count):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("------------END OF A FRAME --------------")
+
+def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
+ print("SECOND : ", second_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last second: ", average_output_count)
+ print("------------END OF A SECOND --------------")
+
+def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
+ print("MINUTE : ", minute_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last minute: ", average_output_count)
+ print("------------END OF A MINUTE --------------")
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20, per_second_function=forSeconds, per_frame_function = forFrame, per_minute_function= forMinute,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+
+**ImageAI** also allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your **.detectObjectsFromVideo()** function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function:
+```python
+def forFull(output_arrays, count_arrays, average_output_count):
+ #Perform action on the 3 parameters returned into the function
+
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ video_complete_function=forFull,
+ minimum_percentage_probability=40,
+ log_progress=True)
+
+```
+
+**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set **return_detected_frame=True** in your **detectObjectsFromVideo()** function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:
+
+```python
+def forFrame(frame_number, output_array, output_count, detected_frame):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("Returned Objects is : ", type(detected_frame))
+ print("------------END OF A FRAME --------------")
+
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ per_frame_function=forFrame,
+ minimum_percentage_probability=40,
+ log_progress=True, return_detected_frame=True)
+```
+
+
+### Frame Detection Intervals
+
+
+The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame
+of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up
+your video detection process. When calling the `.detectObjectsFromVideo()`, you can
+specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be
+ equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
+If your output video **frames_per_second** is set to 20, that means the object detections in the video will
+ be updated once in every quarter of a second or every second. This is useful in case scenarios where the available
+ compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time
+, half-a-second-real-time or whichever way suits your needs.
+
+
+### Custom Video Detection Timeout
+
+
+**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed. To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes).
+
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
+video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20, minimum_percentage_probability=40,
+ detection_timeout=120)
+```
+
+
+### >> Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+
diff --git a/imageai/Detection/Custom/__init__.py b/imageai/Detection/Custom/__init__.py
new file mode 100644
index 00000000..e40c711d
--- /dev/null
+++ b/imageai/Detection/Custom/__init__.py
@@ -0,0 +1,947 @@
+import os
+import time
+import math
+import json
+import warnings
+from typing import List, Union, Tuple, Dict
+from collections import defaultdict
+
+import numpy as np
+from PIL import Image
+import cv2
+import torch
+from torch.cuda import amp
+from torch.utils.data import DataLoader
+from torch.optim import SGD, lr_scheduler
+from tqdm import tqdm
+
+from .yolo.dataset import LoadImagesAndLabels
+from .yolo.custom_anchors import generate_anchors
+from .yolo.compute_loss import compute_loss
+from .yolo import validate
+from ...yolov3.tiny_yolov3 import YoloV3Tiny
+from ...yolov3.yolov3 import YoloV3
+from ...yolov3.utils import draw_bbox_and_label, get_predictions, prepare_image
+
+from ...backend_check.model_extension import extension_check
+
+
+class DetectionModelTrainer:
+ """
+ This is the Detection Model training class, which allows you to train object detection models
+ on image datasets that are in YOLO format, using the YOLOv3.
+ """
+
+ def __init__(self) -> None:
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__cuda = (self.__device != "cpu")
+ self.__model_type = ""
+ self.__model = None
+ self.__optimizer = None
+ self.__data_dir = ""
+ self.__classes: List[str] = None
+ self.__num_classes = None
+ self.__anchors = None
+ self.__dataset_name = None
+ self.__mini_batch_size: int = None
+ self.__scaler = amp.GradScaler(enabled=self.__cuda)
+ self.__lr_lambda = None
+ self.__custom_train_dataset = None
+ self.__custom_val_dataset = None
+ self.__train_loader = None
+ self.__val_loader = None
+
+ self.__model_path: str = None
+ self.__epochs: int = None
+ self.__output_models_dir: str = None
+ self.__output_json_dir: str = None
+
+ def __set_training_param(self, epochs : int, accumulate : int) -> None:
+ # self.__lr_lambda = lambda x : ((1 - math.cos(x * math.pi / epochs)) / 2 ) * (0.1 - 1.0) + 1.0
+ self.__lr_lambda = lambda x: (1 - x / (epochs - 1)) * (1.0 - 0.01) + 0.01
+ self.__anchors = generate_anchors(
+ self.__custom_train_dataset,
+ n=9 if self.__model_type=="yolov3" else 6
+ )
+ self.__anchors = [round(i) for i in self.__anchors.reshape(-1).tolist()]
+ if self.__model_type == "yolov3":
+ self.__model = YoloV3(
+ num_classes=self.__num_classes,
+ anchors=self.__anchors,
+ device=self.__device
+ )
+ elif self.__model_type == "tiny-yolov3":
+ self.__model = YoloV3Tiny(
+ num_classes=self.__num_classes,
+ anchors=self.__anchors,
+ device=self.__device
+ )
+ if self.__model_path:
+ self.__load_model()
+
+ w_d = (5e-4) * (self.__mini_batch_size * accumulate / 64) # scale weight decay
+ g0, g1, g2 = [], [], [] # optimizer parameter groups
+ for m in self.__model.modules():
+ if hasattr(m, 'bias') and isinstance(m.bias, torch.nn.Parameter): # bias
+ g2.append(m.bias)
+ if isinstance(m, torch.nn.BatchNorm2d): # weight (no decay)
+ g0.append(m.weight)
+ elif hasattr(m, 'weight') and isinstance(m.weight, torch.nn.Parameter): # weight (with decay)
+ g1.append(m.weight)
+
+ self.__optimizer = SGD(
+ g0,
+ lr=1e-2,
+ momentum=0.6,
+ # weight_decay=w_d,
+ nesterov=True
+ )
+ self.__optimizer.add_param_group({'params': g1, 'weight_decay': w_d}) # add g1 with weight_decay
+ self.__optimizer.add_param_group({'params': g2}) # add g2 (biases)
+ self.__lr_scheduler = lr_scheduler.LambdaLR(
+ self.__optimizer,
+ lr_lambda=self.__lr_lambda
+ )
+ del g0, g1, g2
+ self.__model.to(self.__device)
+
+ def __load_model(self) -> None:
+ try:
+ state_dict = torch.load(self.__model_path, map_location=self.__device)
+ # check against cases where number of classes differs, causing the
+ # channel of the convolutional layer just before the detection layer
+ # to differ.
+ new_state_dict = {k:v for k,v in state_dict.items() if k in self.__model.state_dict().keys() and v.shape==self.__model.state_dict()[k].shape}
+ self.__model.load_state_dict(new_state_dict, strict=False)
+ print("="*20)
+ print("Pretrained YOLOv3 model loaded to initialize weights")
+ print("="*20)
+ except Exception as e:
+ print("="*20)
+ print("pretrained weight loading failed. Defaulting to using random weight.")
+ print("="*20)
+
+ def __load_data(self) -> None:
+ self.__num_classes = len(self.__classes)
+ self.__dataset_name = os.path.basename(os.path.dirname(self.__data_dir+os.path.sep))
+ self.__custom_train_dataset = LoadImagesAndLabels(self.__data_dir, train=True)
+ self.__custom_val_dataset = LoadImagesAndLabels(self.__data_dir, train=False)
+ self.__train_loader = DataLoader(
+ self.__custom_train_dataset, batch_size=self.__mini_batch_size,
+ shuffle=True,
+ collate_fn=self.__custom_train_dataset.collate_fn
+ )
+ self.__val_loader = DataLoader(
+ self.__custom_val_dataset, batch_size=self.__mini_batch_size//2,
+ shuffle=True, collate_fn=self.__custom_val_dataset.collate_fn
+ )
+
+ def setModelTypeAsYOLOv3(self) -> None:
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model.
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setModelTypeAsTinyYOLOv3(self) -> None:
+ """
+ 'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model.
+ :return:
+ """
+ self.__model_type = "tiny-yolov3"
+
+ def setDataDirectory(self, data_directory: str):
+ """
+ 'setDataDirectory()' is required to set the path to which the data/dataset to be used for training is kept. The input dataset must be in the YOLO format. The directory can have any name, but it must have 'train' and 'validation'
+ sub-directory. In the 'train' and 'validation' sub-directories, there must be 'images' and 'annotations'
+ sub-directories respectively. The 'images' folder will contain the pictures for the dataset and the
+ 'annotations' folder will contain the TXT files with details of the annotations for each image in the
+ 'images folder'.
+ N.B: Strictly take note that the filenames (without the extension) of the pictures in the 'images folder'
+ must be the same as the filenames (except the extension) of their corresponding annotation TXT files in
+ the 'annotations' folder.
+ The structure of the 'train' and 'validation' folder must be as follows:
+ >> train >> images >> img_1.jpg
+ >> images >> img_2.jpg
+ >> images >> img_3.jpg
+ >> annotations >> img_1.txt
+ >> annotations >> img_2.txt
+ >> annotations >> img_3.txt
+ >> validation >> images >> img_151.jpg
+ >> images >> img_152.jpg
+ >> images >> img_153.jpg
+ >> annotations >> img_151.txt
+ >> annotations >> img_152.txt
+ >> annotations >> img_153.txt
+ :param data_directory:
+ :return:
+ """
+ if os.path.isdir(data_directory):
+ self.__data_dir = data_directory
+ else:
+ raise ValueError(
+ "The parameter passed should point to a valid directory"
+ )
+ def setTrainConfig(self, object_names_array: List[str], batch_size: int=4, num_experiments=100, train_from_pretrained_model: str = None):
+ """
+ 'setTrainConfig()' function allows you to set the properties for the training instances. It accepts the following values:
+ - object_names_array , this is an array of the names of the different objects in your dataset, in the index order your dataset is annotated
+ - batch_size (optional), this is the batch size for the training instance
+ - num_experiments (optional), also known as epochs, it is the number of times the network will train on all the training dataset
+ - train_from_pretrained_model (optional), this is used to perform transfer learning by specifying the path to a pre-trained YOLOv3 or TinyYOLOv3 model
+ :param object_names_array:
+ :param batch_size:
+ :param num_experiments:
+ :param train_from_pretrained_model:
+ :return:
+ """
+ self.__model_path = train_from_pretrained_model
+ if self.__model_path:
+ extension_check(self.__model_path)
+ self.__classes = object_names_array
+ self.__mini_batch_size = batch_size
+ self.__epochs = num_experiments
+ self.__output_models_dir = os.path.join(self.__data_dir, "models")
+ self.__output_json_dir = os.path.join(self.__data_dir, "json")
+
+ def trainModel(self) -> None:
+ """
+ 'trainModel()' function starts the actual model training. Once the training starts, the training instance
+ creates 3 sub-folders in your dataset folder which are:
+ - json, where the JSON configuration file for using your trained model is stored
+ - models, where your trained models are stored once they are generated after each improved experiments
+ - cache , where temporary traing configuraton files are stored
+ :return:
+ """
+
+ self.__load_data()
+ os.makedirs(self.__output_models_dir, exist_ok=True)
+ os.makedirs(self.__output_json_dir, exist_ok=True)
+
+ mp, mr, map50, map50_95, best_fitness = 0, 0, 0, 0, 0.0
+ nbs = 64 # norminal batch size
+ nb = len(self.__train_loader) # number of batches
+ nw = max(3 * nb, 1000) # number of warmup iterations.
+ last_opt_step = -1
+ prev_save_name, recent_save_name = "", ""
+
+ accumulate = max(round(nbs / self.__mini_batch_size), 1) # accumulate loss before optimizing.
+
+ self.__set_training_param(self.__epochs, accumulate)
+
+ with open(os.path.join(self.__output_json_dir, f"{self.__dataset_name}_{self.__model_type}_detection_config.json"), "w") as configWriter:
+ json.dump(
+ {
+ "labels": self.__classes,
+ "anchors": self.__anchors
+ },
+ configWriter
+ )
+
+ since = time.time()
+
+ self.__lr_scheduler.last_epoch = -1
+
+ for epoch in range(1, self.__epochs+1):
+ self.__optimizer.zero_grad()
+ mloss = torch.zeros(3, device=self.__device)
+ print(f"Epoch {epoch}/{self.__epochs}", "-"*10, sep="\n")
+
+ for phase in ["train", "validation"]:
+ if phase=="train":
+ self.__model.train()
+ print("Train: ")
+ for batch_i, (data, anns) in tqdm(enumerate(self.__train_loader)):
+ batches_done = batch_i + nb * epoch
+
+ data = data.to(self.__device)
+ anns = anns.to(self.__device)
+
+ # warmup
+ if batches_done <= nw:
+ xi = [0, nw] # x interp
+ accumulate = max(1, np.interp(batches_done, xi, [1, nbs / self.__mini_batch_size]).round())
+ for j, x in enumerate(self.__optimizer.param_groups):
+ # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+ x['lr'] = np.interp(batches_done, xi, [0.1 if j == 2 else 0.0, 0.01 * self.__lr_lambda(epoch)])
+ if 'momentum' in x:
+ x['momentum'] = np.interp(batches_done, xi, [0.8, 0.9])
+
+ with amp.autocast(enabled=self.__cuda):
+ _ = self.__model(data)
+ loss_layers = self.__model.get_loss_layers()
+ loss, loss_components = compute_loss(loss_layers, anns.detach(), self.__device)
+
+ self.__scaler.scale(loss).backward()
+ mloss = (mloss * batch_i + loss_components) / (batch_i + 1)
+
+ # Optimize
+ if batches_done - last_opt_step >= accumulate:
+ self.__scaler.step(self.__optimizer) # optimizer.step
+ self.__scaler.update()
+ self.__optimizer.zero_grad()
+ last_opt_step = batches_done
+
+ print(f" box loss-> {float(mloss[0]):.5f}, object loss-> {float(mloss[1]):.5f}, class loss-> {float(mloss[2]):.5f}")
+
+ self.__lr_scheduler.step()
+
+ else:
+ self.__model.eval()
+ print("Validation:")
+
+ mp, mr, map50, map50_95 = validate.run(
+ self.__model, self.__val_loader,
+ self.__num_classes, device=self.__device
+ )
+
+ print(f" recall: {mr:0.6f} precision: {mp:0.6f} mAP@0.5: {map50:0.6f}, mAP@0.5-0.95: {map50_95:0.6f}" "\n")
+
+ if map50 > best_fitness:
+ best_fitness = map50
+ recent_save_name = self.__model_type+f"_{self.__dataset_name}_mAP-{best_fitness:0.5f}_epoch-{epoch}.pt"
+ if prev_save_name:
+ os.remove(os.path.join(self.__output_models_dir, prev_save_name))
+ torch.save(
+ self.__model.state_dict(),
+ os.path.join(self.__output_models_dir, recent_save_name)
+ )
+ prev_save_name = recent_save_name
+
+ if epoch == self.__epochs:
+ torch.save(
+ self.__model.state_dict(),
+ os.path.join(self.__output_models_dir, self.__model_type+f"_{self.__dataset_name}_last.pt")
+ )
+
+ elapsed_time = time.time() - since
+ print(f"Training completed in {elapsed_time//60:.0f}m {elapsed_time % 60:.0f}s")
+ torch.cuda.empty_cache()
+
+
+class CustomObjectDetection:
+ """
+ This is the object detection class for using your custom trained models.
+ It supports your custom trained YOLOv3 and TinyYOLOv3 model and allows
+ to you to perform object detection in images.
+ """
+ def __init__(self) -> None:
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__anchors: List[int] = None
+ self.__classes: List[str] = None
+ self.__model = None
+ self.__model_loaded: bool = False
+ self.__model_path: str = None
+ self.__json_path: str = None
+ self.__model_type: str = None
+ self.__nms_score = 0.4
+ self.__objectness_score = 0.4
+
+ def setModelTypeAsYOLOv3(self) -> None:
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model.
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setModelTypeAsTinyYOLOv3(self) -> None:
+ """
+ 'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model.
+ :return:
+ """
+ self.__model_type = "tiny-yolov3"
+
+ def setModelPath(self, model_path: str):
+ if os.path.isfile(model_path):
+ extension_check(model_path)
+ self.__model_path = model_path
+ self.__model_loaded = False
+ else:
+ raise ValueError(
+ "invalid path, path not pointing to the weightfile."
+ ) from None
+ self.__model_path = model_path
+
+ def setJsonPath(self, configuration_json: str):
+ self.__json_path = configuration_json
+
+ def __load_classes_and_anchors(self) -> List[str]:
+
+ with open(self.__json_path) as f:
+ json_config = json.load(f)
+ self.__anchors = json_config["anchors"]
+ self.__classes = json_config["labels"]
+
+ def __load_image_yolo(self, input_image : Union[str, np.ndarray, Image.Image]) -> Tuple[List[str], List[np.ndarray], torch.Tensor, torch.Tensor]:
+ """
+ Loads image/images from the given path. If the given path is a directory,
+ this function only load the images in the directory (it does noot visit the
+ subdirectories).
+ """
+ allowed_exts = ["jpg", "jpeg", "png"]
+ fnames = []
+ original_dims = []
+ inputs = []
+ original_imgs = []
+ if type(input_image) == str:
+ if os.path.isfile(input_image):
+ if input_image.rsplit('.')[-1].lower() in allowed_exts:
+ img = cv2.imread(input_image)
+ else:
+ raise ValueError(f"image path '{input_image}' is not found or a valid file")
+ elif type(input_image) == np.ndarray:
+ img = input_image
+ elif "PIL" in str(type(input_image)):
+ img = np.asarray(input_image)
+ else:
+ raise ValueError(f"Invalid image input format")
+
+ img_h, img_w, _ = img.shape
+
+ original_imgs.append(np.array(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).astype(np.uint8))
+ original_dims.append((img_w, img_h))
+ if type(input_image) == str:
+ fnames.append(os.path.basename(input_image))
+ else:
+ fnames.append("")
+ inputs.append(prepare_image(img, (416, 416)))
+
+ if original_dims:
+ return (
+ fnames,
+ original_imgs,
+ torch.FloatTensor(original_dims).repeat(1,2).to(self.__device),
+ torch.cat(inputs, 0).to(self.__device)
+ )
+ raise RuntimeError(
+ f"Error loading image."
+ "\nEnsure the file is a valid image,"
+ " allowed file extensions are .jpg, .jpeg, .png"
+ )
+
+ def useCPU(self):
+ """
+ Used to force classification to be done on CPU.
+ By default, classification will occur on GPU compute if available else CPU compute.
+ """
+
+ self.__device = "cpu"
+ if self.__model_loaded:
+ self.__model_loaded = False
+ self.loadModel()
+
+ def loadModel(self) -> None:
+ """
+ Loads the pretrained weights in the specified model path.
+ """
+ self.__load_classes_and_anchors()
+
+ if self.__model_type == "yolov3":
+ self.__model = YoloV3(
+ anchors=self.__anchors,
+ num_classes=len(self.__classes),
+ device=self.__device
+ )
+ elif self.__model_type == "tiny-yolov3":
+ self.__model = YoloV3Tiny(
+ anchors=self.__anchors,
+ num_classes=len(self.__classes),
+ device=self.__device
+ )
+ else:
+ raise ValueError(f"Invalid model type. Call setModelTypeAsYOLOv3() or setModelTypeAsTinyYOLOv3() to set a model type before loading the model")
+
+ self.__model.to(self.__device)
+
+ state_dict = torch.load(self.__model_path, map_location=self.__device)
+ try:
+ self.__model.load_state_dict(state_dict)
+ self.__model_loaded = True
+ self.__model.to(self.__device).eval()
+ except Exception as e:
+ raise RuntimeError(f"Invalid weights!!! {e}")
+
+
+ def detectObjectsFromImage(self,
+ input_image: Union[str, np.ndarray, Image.Image],
+ output_image_path: str=None,
+ output_type: str ="file",
+ extract_detected_objects: bool=False, minimum_percentage_probability: int=40,
+ display_percentage_probability: bool=True, display_object_name: bool=True,
+ display_box: bool=True,
+ custom_objects: List=None,
+ nms_treshold: float= 0.4,
+ objectness_treshold: float= 0.4,
+ ) -> Union[List[List[Tuple[str, float, Dict[str, int]]]], np.ndarray, List[np.ndarray], List[str]]:
+ """
+ Detects objects in an image using the unique classes provided
+ by COCO.
+
+ :param input_image: path to an image file, cv2 image or PIL image
+ :param output_image_path: path to save input image with predictions rendered
+ :param output_type: type of output for rendered image. Acceptable values are 'file' and 'array` ( a cv2 image )
+ :param extract_detected_objects: extract each object based on the output type
+ :param minimum_percentage_probability: the minimum confidence a detected object must have
+ :param display_percentage_probability: to diplay/not display the confidence on rendered image
+ :param display_object_name: to diplay/not display the object name on rendered image
+ :param display_box: to diplay/not display the object bounding box on rendered image
+ :param custom_objects: a dictionary of detectable objects set to boolean values
+
+ :returns: A list of tuples containing the label of detected object and the
+ confidence.
+ """
+
+ self.__nms_score = nms_treshold
+ self.__objectness_score = objectness_treshold
+
+ self.__model.eval()
+ if not self.__model_loaded:
+ if self.__model_path:
+ warnings.warn(
+ "Model path has changed but pretrained weights in the"
+ " new path is yet to be loaded.",
+ ResourceWarning
+ )
+ else:
+ raise RuntimeError(
+ "Model path isn't set, pretrained weights aren't used."
+ )
+
+ predictions = defaultdict(lambda : [])
+
+
+ if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
+ fnames, original_imgs, input_dims, imgs = self.__load_image_yolo(input_image)
+
+ with torch.no_grad():
+ output = self.__model(imgs)
+
+ output = get_predictions(
+ pred=output.to(self.__device), num_classes=len(self.__classes),
+ nms_confidence_level=self.__nms_score, objectness_confidence= self.__objectness_score,
+ device=self.__device
+ )
+
+ if output is None:
+ if output_type == "array":
+ if extract_detected_objects:
+ return original_imgs[0], [], []
+ else:
+ return original_imgs[0], []
+ else:
+ if extract_detected_objects:
+ return original_imgs[0], []
+ else:
+ return []
+
+ # scale the output to match the dimension of the original image
+ input_dims = torch.index_select(input_dims, 0, output[:, 0].long())
+ scaling_factor = torch.min(416 / input_dims, 1)[0].view(-1, 1)
+ output[:, [1,3]] -= (416 - (scaling_factor * input_dims[:, 0].view(-1,1))) / 2
+ output[:, [2,4]] -= (416 - (scaling_factor * input_dims[:, 1].view(-1,1))) / 2
+ output[:, 1:5] /= scaling_factor
+
+ #clip bounding box for those that extended outside the detected image.
+ for idx in range(output.shape[0]):
+ output[idx, [1,3]] = torch.clamp(output[idx, [1,3]], 0.0, input_dims[idx, 0])
+ output[idx, [2,4]] = torch.clamp(output[idx, [2,4]], 0.0, input_dims[idx, 1])
+
+ for pred in output:
+ pred_label = self.__classes[int(pred[-1])]
+ if custom_objects:
+ if pred_label.replace(" ", "_") in custom_objects.keys():
+ if not custom_objects[pred_label.replace(" ", "_")]:
+ continue
+ else:
+ continue
+ predictions[int(pred[0])].append((
+ pred_label,
+ float(pred[-2]),
+ {k:v for k,v in zip(["x1", "y1", "x2", "y2"], map(int, pred[1:5]))},
+ ))
+
+ # Render detection on copy of input image
+ original_input_image = None
+ output_image_array = None
+ extracted_objects = []
+
+ if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
+ original_input_image = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)
+ if isinstance(output, torch.Tensor):
+ for pred in output:
+ percentage_conf = round(float(pred[-2]) * 100, 2)
+ if percentage_conf < minimum_percentage_probability:
+ continue
+
+ displayed_label = ""
+ if display_object_name:
+ displayed_label = f"{self.__classes[int(pred[-1].item())]} : "
+ if display_percentage_probability:
+ displayed_label += f" {percentage_conf}%"
+
+
+ original_imgs[int(pred[0].item())] = draw_bbox_and_label(pred[1:5].int() if display_box else None,
+ displayed_label,
+ original_imgs[int(pred[0].item())]
+ )
+ output_image_array = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)
+
+ # Format predictions for function reponse
+ predictions_batch = list(predictions.values())
+ predictions_list = predictions_batch[0] if len(predictions_batch) > 0 else []
+ min_probability = minimum_percentage_probability / 100
+
+
+ if output_type == "file":
+ if output_image_path:
+ cv2.imwrite(output_image_path, output_image_array)
+
+ if extract_detected_objects:
+ extraction_dir = ".".join(output_image_path.split(".")[:-1]) + "-extracted"
+ os.mkdir(extraction_dir)
+ count = 0
+ for obj_prediction in predictions_list:
+ if obj_prediction[1] >= min_probability:
+ count += 1
+ extracted_path = os.path.join(
+ extraction_dir,
+ ".".join(os.path.basename(output_image_path).split(".")[:-1]) + f"-{count}.jpg"
+ )
+ obj_bbox = obj_prediction[2]
+ cv2.imwrite(extracted_path, original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])
+
+ extracted_objects.append(extracted_path)
+
+ elif output_type == "array":
+ if extract_detected_objects:
+ for obj_prediction in predictions_list:
+ if obj_prediction[1] >= min_probability:
+ obj_bbox = obj_prediction[2]
+
+ extracted_objects.append(original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])
+ else:
+ raise ValueError(f"Invalid output_type '{output_type}'. Supported values are 'file' and 'array' ")
+
+
+ predictions_list = [
+ {
+ "name": prediction[0], "percentage_probability": round(prediction[1] * 100, 2),
+ "box_points": [prediction[2]["x1"], prediction[2]["y1"], prediction[2]["x2"], prediction[2]["y2"]]
+ } for prediction in predictions_list if prediction[1] >= min_probability
+ ]
+
+
+ if output_type == "array":
+ if extract_detected_objects:
+ return output_image_array, predictions_list, extracted_objects
+ else:
+ return output_image_array, predictions_list
+ else:
+ if extract_detected_objects:
+ return predictions_list, extracted_objects
+ else:
+ return predictions_list
+
+
+class CustomVideoObjectDetection:
+ """
+ This is the custom objects detection class for videos and camera live stream inputs in the ImageAI library. It provides support for YOLOv3 and TinyYOLOv3 object detection networks. After instantiating this class, you can set it's properties and
+ make object detections using it's pre-defined functions.
+ The following functions are required to be called before object detection can be made
+ * setModelPath()
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTinyYOLOv3()]
+ * loadModel() [This must be called once only before performing object detection]
+ Once the above functions have been called, you can call the detectObjectsFromVideo() function
+ or the detectCustomObjectsFromVideo() of the object detection instance object at anytime to
+ obtain observable objects in any video or camera live stream.
+ """
+
+ def __init__(self):
+ self.__detector = CustomObjectDetection()
+
+ def setModelTypeAsYOLOv3(self):
+ self.__detector.setModelTypeAsYOLOv3()
+
+ def setModelTypeAsTinyYOLOv3(self):
+ self.__detector.setModelTypeAsTinyYOLOv3()
+
+ def setModelPath(self, model_path: str):
+ extension_check(model_path)
+ self.__detector.setModelPath(model_path)
+
+ def setJsonPath(self, configuration_json: str):
+ self.__detector.setJsonPath(configuration_json)
+
+ def loadModel(self):
+ self.__detector.loadModel()
+
+ def useCPU(self):
+ self.__detector.useCPU()
+
+ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
+ frame_detection_interval=1, minimum_percentage_probability=40, log_progress=False,
+ display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
+ per_frame_function=None, per_second_function=None, per_minute_function=None,
+ video_complete_function=None, return_detected_frame=False, detection_timeout = None):
+
+ """
+ 'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictionary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ as the fifth value into the function
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+ * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
+ :param input_file_path:
+ :param camera_input
+ :param output_file_path:
+ :param save_detected_video:
+ :param frames_per_second:
+ :param frame_detection_interval:
+ :param minimum_percentage_probability:
+ :param log_progress:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param per_frame_function:
+ :param per_second_function:
+ :param per_minute_function:
+ :param video_complete_function:
+ :param return_detected_frame:
+ :param detection_timeout:
+ :param thread_safe:
+ :return output_video_filepath:
+ :return counting:
+ :return output_objects_array:
+ :return output_objects_count:
+ :return detected_copy:
+ :return this_second_output_object_array:
+ :return this_second_counting_array:
+ :return this_second_counting:
+ :return this_minute_output_object_array:
+ :return this_minute_counting_array:
+ :return this_minute_counting:
+ :return this_video_output_object_array:
+ :return this_video_counting_array:
+ :return this_video_counting:
+ """
+
+ if (input_file_path == "" and camera_input == None):
+ raise ValueError(
+ "You must set 'input_file_path' to a valid video file, or set 'camera_input' to a valid camera")
+ elif (save_detected_video == True and output_file_path == ""):
+ raise ValueError(
+ "You must set 'output_video_filepath' to a valid video file name, in which the detected video will be saved. If you don't intend to save the detected video, set 'save_detected_video=False'")
+
+ else:
+
+ output_frames_dict = {}
+ output_frames_count_dict = {}
+
+ input_video = cv2.VideoCapture(input_file_path)
+ if (camera_input != None):
+ input_video = camera_input
+
+ output_video_filepath = output_file_path + '.mp4'
+
+ frame_width = int(input_video.get(3))
+ frame_height = int(input_video.get(4))
+ output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc(*"MP4V"),
+ frames_per_second,
+ (frame_width, frame_height))
+
+ counting = 0
+
+ detection_timeout_count = 0
+ video_frames_count = 0
+
+ while (input_video.isOpened()):
+ ret, frame = input_video.read()
+
+ if (ret == True):
+
+ video_frames_count += 1
+ if (detection_timeout != None):
+ if ((video_frames_count % frames_per_second) == 0):
+ detection_timeout_count += 1
+
+ if (detection_timeout_count >= detection_timeout):
+ break
+
+ output_objects_array = []
+
+ counting += 1
+
+ if (log_progress == True):
+ print("Processing Frame : ", str(counting))
+
+ detected_copy = frame.copy()
+
+ check_frame_interval = counting % frame_detection_interval
+
+ if (counting == 1 or check_frame_interval == 0):
+ try:
+ detected_copy, output_objects_array = self.__detector.detectObjectsFromImage(
+ input_image=frame, output_type="array",
+ minimum_percentage_probability=minimum_percentage_probability,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name,
+ display_box=display_box)
+
+ except Exception as e:
+ warnings.warn()
+
+ if (save_detected_video == True):
+ output_video.write(detected_copy)
+
+ if detected_copy is not None and output_objects_array is not None:
+
+ output_frames_dict[counting] = output_objects_array
+
+ output_objects_count = {}
+ for eachItem in output_objects_array:
+ eachItemName = eachItem["name"]
+ try:
+ output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
+ except:
+ output_objects_count[eachItemName] = 1
+
+ output_frames_count_dict[counting] = output_objects_count
+
+ if (counting == 1 or check_frame_interval == 0):
+ if (per_frame_function != None):
+ if (return_detected_frame == True):
+ per_frame_function(counting, output_objects_array, output_objects_count,
+ detected_copy)
+ elif (return_detected_frame == False):
+ per_frame_function(counting, output_objects_array, output_objects_count)
+
+ if (per_second_function != None):
+ if (counting != 1 and (counting % frames_per_second) == 0):
+
+ this_second_output_object_array = []
+ this_second_counting_array = []
+ this_second_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - frames_per_second)):
+ this_second_output_object_array.append(output_frames_dict[aa + 1])
+ this_second_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_second_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_second_counting[eachItem] = this_second_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_second_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_second_counting:
+ this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)
+
+ if (return_detected_frame == True):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting, detected_copy)
+
+ elif (return_detected_frame == False):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting)
+
+ if (per_minute_function != None):
+
+ if (counting != 1 and (counting % (frames_per_second * 60)) == 0):
+
+ this_minute_output_object_array = []
+ this_minute_counting_array = []
+ this_minute_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - (frames_per_second * 60))):
+ this_minute_output_object_array.append(output_frames_dict[aa + 1])
+ this_minute_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_minute_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_minute_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_minute_counting:
+ this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))
+
+ if (return_detected_frame == True):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting, detected_copy)
+
+ elif (return_detected_frame == False):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting)
+ else:
+ break
+
+ if (video_complete_function != None):
+
+ this_video_output_object_array = []
+ this_video_counting_array = []
+ this_video_counting = {}
+
+ for aa in range(counting):
+ this_video_output_object_array.append(output_frames_dict[aa + 1])
+ this_video_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_video_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_video_counting[eachItem] = this_video_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_video_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_video_counting:
+ this_video_counting[eachCountingItem] = int(this_video_counting[eachCountingItem] / counting)
+
+ video_complete_function(this_video_output_object_array, this_video_counting_array,
+ this_video_counting)
+
+ input_video.release()
+ output_video.release()
+
+ if (save_detected_video == True):
+ return output_video_filepath
+
+
\ No newline at end of file
diff --git a/imageai/Detection/YOLOv3/__init__.py b/imageai/Detection/Custom/yolo/__init__.py
similarity index 100%
rename from imageai/Detection/YOLOv3/__init__.py
rename to imageai/Detection/Custom/yolo/__init__.py
diff --git a/imageai/Detection/Custom/yolo/compute_loss.py b/imageai/Detection/Custom/yolo/compute_loss.py
new file mode 100644
index 00000000..5233ecc5
--- /dev/null
+++ b/imageai/Detection/Custom/yolo/compute_loss.py
@@ -0,0 +1,199 @@
+import math
+
+import torch
+import torch.nn as nn
+
+# This new loss function is based on https://github.com/ultralytics/yolov3/blob/master/utils/loss.py
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
+ # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+ box2 = box2.T
+
+ # Get the coordinates of bounding boxes
+ if x1y1x2y2: # x1, y1, x2, y2 = box1
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+ else: # transform from xywh to xyxy
+ b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+ b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+ b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+ b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+ # Intersection area
+ inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+
+ # Union Area
+ w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+ w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+ union = w1 * h1 + w2 * h2 - inter + eps
+
+ iou = inter / union
+ if GIoU or DIoU or CIoU:
+ # convex (smallest enclosing box) width
+ cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
+ ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
+ if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+ c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
+ rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
+ if DIoU:
+ return iou - rho2 / c2 # DIoU
+ elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+ v = (4 / math.pi ** 2) * \
+ torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+ with torch.no_grad():
+ alpha = v / ((1 + eps) - iou + v)
+ return iou - (rho2 / c2 + v * alpha) # CIoU
+ else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
+ c_area = cw * ch + eps # convex area
+ return iou - (c_area - union) / c_area # GIoU
+ else:
+ return iou # IoU
+
+
+def compute_loss(loss_layers, targets, device="cpu"):
+ nc = loss_layers[0].num_classes
+ nl = len(loss_layers)
+ # output at each layer
+ predictions = [layer.pred for layer in loss_layers]
+
+ # placeholders for the losses.
+ lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+
+ # Build yolo targets
+ tcls, tbox, indices, anchors = build_targets(predictions, targets, loss_layers, device) # targets
+
+ BCEcls = nn.BCEWithLogitsLoss(
+ pos_weight=torch.tensor([1.0], device=device))
+ BCEobj = nn.BCEWithLogitsLoss(
+ pos_weight=torch.tensor([1.0], device=device))
+
+ balance = [4.0, 1.0, 0.4]
+
+ # Calculate losses for each yolo layer
+ for layer_index, layer_predictions in enumerate(predictions):
+ # Get image ids, anchors, grid index i and j for each target in the current yolo layer
+ b, anchor, grid_j, grid_i = indices[layer_index]
+ # Build empty object target tensor with the same shape as the object prediction
+ tobj = torch.zeros_like(layer_predictions[..., 0], device=device) # target obj
+ # Get the number of targets for this layer.
+ # Each target is a label box with some scaling and the association of an anchor box.
+ # Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets.
+ num_targets = b.shape[0]
+ # Check if there are targets for this batch
+ if num_targets:
+ # Load the corresponding values from the predictions for each of the targets
+ ps = layer_predictions[b, anchor, grid_j, grid_i]
+
+ # Regression of the box
+ # Apply sigmoid to xy offset predictions in each cell that has a target
+ pxy = ps[:, :2].sigmoid() * 2 - 0.5
+ # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
+ pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[layer_index]
+ # Build box out of xy and wh
+ pbox = torch.cat((pxy, pwh), 1)
+ # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor
+ iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True)
+ # We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean
+ lbox += (1.0 - iou).mean() # iou loss
+
+ # Classification of the objectness
+ # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
+ tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets
+
+ # Classification of the class
+ # Check if we need to do a classification (number of classes > 1)
+ if nc > 1:
+ # Hot one class encoding
+ t = torch.full_like(ps[:, 5:], 0.0, device=device) # targets
+ t[range(num_targets), tcls[layer_index]] = 1
+ # Use the tensor to calculate the BCE loss
+ lcls += BCEcls(ps[:, 5:], t) # BCE
+
+ # Classification of the objectness the sequel
+ # Calculate the BCE loss between the on the fly generated target and the network prediction
+ obji = BCEobj(layer_predictions[..., 4], tobj) # obj loss
+ lobj += obji * balance[layer_index]
+
+ lbox *= 0.05
+ lobj *= (1.0 * ((416 / 640) ** 2)) # scale to image size
+ lcls *= (0.5 * (nc / 80)) # scale to classes
+
+ # Merge losses
+ loss = (lbox + lobj + lcls) * tobj.shape[0]
+
+ return loss, (torch.cat((lbox, lobj, lcls))).detach()
+
+
+def build_targets(p, targets, loss_layers, device="cpu"):
+ # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+ na, nt = len(loss_layers[0].anchors), targets.shape[0] # number of anchors, targets
+ tcls, tbox, indices, anch = [], [], [], []
+ gain = torch.ones(7, device=device) # normalized to gridspace gain
+ # Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes
+ ai = torch.arange(na, device=device).float().view(na, 1).repeat(1, nt)
+ # Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension
+ targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)
+
+ g = 0.5
+ off = torch.tensor([
+ [0, 0], [1, 0], [0, 1],
+ [-1, 0], [0, -1]
+ ], device=device).float() * g #offsets
+
+ for i, yolo_layer in enumerate(loss_layers):
+ # Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1
+ anchors = yolo_layer.anchors / yolo_layer.stride
+ # Add the number of yolo cells in this layer the gain tensor
+ # The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id)
+ gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
+ # Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system
+ t = targets * gain
+ # Check if we have targets
+ if nt:
+ # Calculate ration between anchor and target box for both width and height
+ r = t[:, :, 4:6] / anchors[:, None]
+ # Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4
+ j = torch.max(r, 1.0 / r).max(2)[0] < 4.0 # compare
+ # Only use targets that have the correct ratios for their anchors
+ # That means we only keep ones that have a matching anchor and we loose the anchor dimension
+ # The anchor id is still saved in the 7th value of each target
+ t = t[j]
+
+ #offsets
+ gxy = t[:, 2:4] #grid xy
+ gxi = gain[[2,3]] - gxy
+ j, k = ((gxy % 1 < g) & (gxy > 1)).T
+ l, m = ((gxi % 1 < g) & (gxi > 1)).T
+ j = torch.stack((torch.ones_like(j), j, k, l, m))
+ t = t.repeat((5, 1, 1))[j]
+ offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+ else:
+ t = targets[0]
+ offsets = 0
+
+ # Extract image id in batch and class id
+ b, c = t[:, :2].long().T
+ # We isolate the target cell associations.
+ # x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth
+ gxy = t[:, 2:4] #grid xy
+ gwh = t[:, 4:6] # grid wh
+ # Cast to int to get an cell index e.g. 1.2 gets associated to cell 1
+ gij = (gxy - offsets).long()
+ # Isolate x and y index dimensions
+ gi, gj = gij.T # grid xy indices
+
+ # Convert anchor indexes to int
+ a = t[:, 6].long()
+ # Add target tensors for this yolo layer to the output lists
+ # Add to index list and limit index range to prevent out of bounds
+ indices.append((b, a, gj.clamp_(0, int(gain[3] - 1)), gi.clamp_(0, int(gain[2] - 1))))
+ # Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell
+ tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
+ # Add correct anchor for each target to the list
+ anch.append(anchors[a])
+ # Add class for each target to the list
+ tcls.append(c)
+
+ return tcls, tbox, indices, anch
diff --git a/imageai/Detection/Custom/yolo/custom_anchors.py b/imageai/Detection/Custom/yolo/custom_anchors.py
new file mode 100644
index 00000000..219b2130
--- /dev/null
+++ b/imageai/Detection/Custom/yolo/custom_anchors.py
@@ -0,0 +1,79 @@
+import random
+
+import torch
+import numpy as np
+from scipy.cluster.vq import kmeans
+
+# This new anchor generator function is based on https://github.com/ultralytics/yolov3/blob/master/utils/autoanchor.py
+
+def generate_anchors(dataset, n=9, img_size=416, thr=4.0, gen=1000, verbose=True):
+ """ Creates kmeans-evolved anchors from training dataset
+
+ Arguments:
+ dataset: a loaded dataset i.e. subclass of torch.utils.data.Dataset
+ n: number of anchors
+ img_size: image size used for training
+ thr: anchor-label wh ratio threshold used for training, default=4.0
+ gen: generations to evolve anchors using genetic algorithm
+ verbose: print all results
+
+ Return:
+ k: kmeans evolved anchors
+ """
+ thr = 1 / thr
+
+ def metric(k, wh): # compute metrics
+ r = wh[:, None] / k[None]
+ x = torch.min(r, 1 / r).min(2)[0] # ratio metric
+ return x, x.max(1)[0] # x, best_x
+
+ def anchor_fitness(k): # mutation fitness
+ _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
+ return (best * (best > thr).float()).mean() # fitness
+
+ def print_results(k, verbose=True):
+ k = k[np.argsort(k.prod(1))] # sort small to large
+ if verbose:
+ x, best = metric(k, wh0)
+ bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
+ s = f'thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
+ f'n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
+ f'past_thr={x[x > thr].mean():.3f}-mean: '
+ print(s)
+ return k
+
+ # Get label wh
+ shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
+ wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
+
+ # Filter
+ i = (wh0 < 3.0).any(1).sum()
+ if i and verbose:
+ print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
+ wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
+ # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
+
+ # Kmeans calculation
+ s = wh.std(0) # sigmas for whitening
+ k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
+ assert len(k) == n, f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
+ k *= s
+ wh = torch.tensor(wh, dtype=torch.float32) # filtered
+ wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
+ k = print_results(k, verbose=False)
+
+ # Evolve
+ npr = np.random
+ f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
+ if verbose:
+ print("Generating anchor boxes for training images...")
+ for _ in range(gen):
+ v = np.ones(sh)
+ while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
+ v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+ kg = (k.copy() * v).clip(min=2.0)
+ fg = anchor_fitness(kg)
+ if fg > f:
+ f, k = fg, kg.copy()
+
+ return print_results(k)
diff --git a/imageai/Detection/Custom/yolo/dataset.py b/imageai/Detection/Custom/yolo/dataset.py
new file mode 100644
index 00000000..2d3f1062
--- /dev/null
+++ b/imageai/Detection/Custom/yolo/dataset.py
@@ -0,0 +1,132 @@
+import os
+import warnings
+from typing import Tuple, List
+
+import cv2 as cv
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+
+from ....yolov3.utils import prepare_image
+
+class LoadImagesAndLabels(Dataset):
+
+ def __init__(self, path : str, net_dim=(416, 416), train=True):
+ if not os.path.isdir(path):
+ raise NotADirectoryError("path is not a valid directory!!!")
+
+ super().__init__()
+
+ if train:
+ path = os.path.join(path, "train")
+ else:
+ path = os.path.join(path, "validation")
+
+ self.__net_width, self.__net_height = net_dim
+ self.__images_paths = []
+ self.shapes = []
+ self.labels = []
+ for img in os.listdir(os.path.join(path, "images")):
+ p = os.path.join(path, "images", img)
+ image = cv.imread(p)
+ if isinstance(image, np.ndarray):
+ l_p = self.__img_path2label_path(p)
+ self.__images_paths.append(p)
+ self.shapes.append((image.shape[1], image.shape[0]))
+ self.labels.append(self.__load_raw_label(l_p))
+
+ self.__nsamples = len(self.__images_paths)
+ self.shapes = np.array(self.shapes)
+
+ def __len__(self) -> int:
+ return self.__nsamples
+
+ def __img_path2label_path(self, path : str) -> str:
+ im, lb = os.sep+"images"+os.sep, os.sep+"annotations"+os.sep
+ return lb.join(path.rsplit(im, 1)).rsplit(".", 1)[0] + ".txt"
+
+ def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor]:
+ if idx >= self.__nsamples:
+ raise IndexError("Index out of range.")
+ image_path = self.__images_paths[idx]
+ label = self.labels[idx].copy()
+ image, label = self.__load_data(image_path, label)
+ return image, label
+
+ def __xywhn2xyxy(self, nlabel : torch.Tensor, width : int, height : int) -> torch.Tensor:
+ """
+ Transformed label from normalized center_x, center_y, width, height to
+ x_1, y_1, x_2, y_2
+ """
+ label = nlabel.clone()
+ label[:, 1] = (nlabel[:, 1] - (nlabel[:, 3] / 2)) * width
+ label[:, 2] = (nlabel[:, 2] - (nlabel[:, 4] / 2)) * height
+ label[:, 3] = (nlabel[:, 1] + (nlabel[:, 3] / 2)) * width
+ label[:, 4] = (nlabel[:, 2] + (nlabel[:, 4] / 2)) * height
+
+ return label
+
+ def __load_data(self, img_path : str, label : np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
+ img = cv.imread(img_path)
+ img_h, img_w = img.shape[:2]
+ img = prepare_image(img[:, :, :3], [self.__net_width, self.__net_height])
+ lab = self.__process_label(label, img_w, img_h)
+ return img.squeeze(), lab
+
+ def __load_raw_label(self, label_path : str):
+ if os.path.isfile(label_path):
+ with warnings.catch_warnings():
+ l = np.loadtxt(label_path).reshape(-1,5)
+ assert (l >= 0).all(), "bounding box values should be positive and in range 0 - 1"
+ assert (l[:, 1:] <= 1).all(), "bounding box values should be in the range 0 - 1"
+ else:
+ l = np.zeros((0,5), dtype=np.float32)
+ return l
+
+ def __process_label(self, label : np.ndarray, image_width : int, image_height : int) -> torch.Tensor:
+ """
+ Process corresponding label and resize the ground truth bounding boxes
+ to match the dimension of the resizes image.
+ """
+ #max_box = 50
+ scaling_factor = min(
+ self.__net_width/image_width,
+ self.__net_width/image_height
+ )
+ #bs = torch.zeros((max_box, 6))
+ bs = torch.zeros((len(label), 6))
+ if label.size > 0:
+ nlabels = torch.from_numpy(label)
+ labels = self.__xywhn2xyxy(nlabels, image_width, image_height)
+ # scale bounding box to match new image size
+ labels[:, [1,3]] = ((labels[:, [1,3]] * scaling_factor) +\
+ (self.__net_width - (image_width * scaling_factor))/2)
+ labels[:, [2,4]] = ((labels[:, [2,4]] * scaling_factor) +\
+ (self.__net_width - (image_height * scaling_factor))/2)
+
+ # convert x1, y1, x2, y2 to center_x, center_y, width, height
+ label_copy = labels.clone()
+ labels[:, 1] = (label_copy[:, 3] + label_copy[:, 1])/2
+ labels[:, 2] = (label_copy[:, 4] + label_copy[:, 2])/2
+ labels[:, 3] = (label_copy[:, 3] - label_copy[:, 1])
+ labels[:, 4] = (label_copy[:, 4] - label_copy[:, 2])
+
+
+ # scale labels by new image dimension
+ labels[:, 1:5] /= self.__net_width
+ bs[:, 1:] = labels[:, :]
+ return bs
+
+ def collate_fn(self, batch) -> Tuple[torch.Tensor, torch.Tensor]:
+ batch = [data for data in batch if data is not None]
+ imgs, bboxes = list(zip(*batch))
+
+ imgs = torch.stack(imgs)
+
+ for i, boxes in enumerate(bboxes):
+ boxes[:, 0] = i
+ bboxes = torch.cat(bboxes, 0)
+
+ return imgs, bboxes
+
diff --git a/imageai/Detection/Custom/yolo/metric.py b/imageai/Detection/Custom/yolo/metric.py
new file mode 100644
index 00000000..b61b9449
--- /dev/null
+++ b/imageai/Detection/Custom/yolo/metric.py
@@ -0,0 +1,84 @@
+import math
+import warnings
+
+import numpy as np
+import torch
+
+# This new metric functions is based on https://github.com/ultralytics/yolov3/blob/master/utils/metric.py
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+ # Arguments
+ tp: True positives (nparray, nx1 or nx10).
+ conf: Objectness value from 0-1 (nparray).
+ pred_cls: Predicted object classes (nparray).
+ target_cls: True object classes (nparray).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+
+ # Sort by objectness
+ i = np.argsort(-conf)
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+ # Find unique classes
+ unique_classes = np.unique(target_cls)
+ nc = unique_classes.shape[0] # number of classes, number of detections
+
+ # Create Precision-Recall curve and compute AP for each class
+ px = np.linspace(0, 1, 1000)
+ ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+ for ci, c in enumerate(unique_classes):
+ i = pred_cls == c
+ n_l = (target_cls == c).sum() # number of labels
+ n_p = i.sum() # number of predictions
+
+ if n_p == 0 or n_l == 0:
+ continue
+ else:
+ # Accumulate FPs and TPs
+ fpc = (1 - tp[i]).cumsum(0)
+ tpc = tp[i].cumsum(0)
+
+ # Recall
+ recall = tpc / (n_l + 1e-16) # recall curve
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
+
+ # Precision
+ precision = tpc / (tpc + fpc) # precision curve
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
+
+ # AP from recall-precision curve
+ for j in range(tp.shape[1]):
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+
+ # Compute F1 (harmonic mean of precision and recall)
+ f1 = 2 * p * r / (p + r + 1e-16)
+ i = f1.mean(0).argmax() # max F1 index
+
+ return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
+
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves
+ # Arguments
+ recall: The recall curve (list)
+ precision: The precision curve (list)
+ # Returns
+ Average precision, precision curve, recall curve
+ """
+
+ # Append sentinel values to beginning and end
+ mrec = np.concatenate(([0.0], recall, [1.0]))
+ mpre = np.concatenate(([1.0], precision, [0.0]))
+
+ # Compute the precision envelope
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+ # Integrate area under curve
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
+
+ return ap, mpre, mrec
+
diff --git a/imageai/Detection/Custom/yolo/validate.py b/imageai/Detection/Custom/yolo/validate.py
new file mode 100644
index 00000000..92416ac3
--- /dev/null
+++ b/imageai/Detection/Custom/yolo/validate.py
@@ -0,0 +1,116 @@
+import os
+
+import numpy as np
+import torch
+from torchvision.ops import box_iou
+
+from ....yolov3.utils import get_predictions
+from .metric import ap_per_class
+from tqdm import tqdm
+
+# This new validation function is based on https://github.com/ultralytics/yolov3/blob/master/val.py
+
+
+def xywh2xyxy(box_coord : torch.Tensor):
+ """
+ Convert bounding box coordinates from center_x, center_y, width, height
+ to x_1, y_1, x_2, x_3
+ """
+ n = box_coord.clone()
+ n[:, 0] = (box_coord[:, 0] - (box_coord[:, 2] / 2))
+ n[:, 1] = (box_coord[:, 1] - (box_coord[:, 3] / 2))
+ n[:, 2] = (box_coord[:, 0] + (box_coord[:, 2] / 2))
+ n[:, 3] = (box_coord[:, 1] + (box_coord[:, 3] / 2))
+
+ return n
+
+def process_batch(detections, labels, iouv):
+ """
+ Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+ Arguments:
+ detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (Array[M, 5]), class, x1, y1, x2, y2
+ Returns:
+ correct (Array[N, 10]), for 10 IoU levels
+ """
+ detections[:, [1,3]] = torch.clamp(detections[:, [1,3]], 0.0, 416)
+ detections[:, [2,4]] = torch.clamp(detections[:, [2,4]], 0.0, 416)
+
+ correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+ iou = box_iou(labels[:, 1:], detections[:, 1:5])
+ x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 7])) # IoU above threshold and classes match
+ if x[0].shape[0]:
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detection, iou]
+ if x[0].shape[0] > 1:
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+ matches = torch.Tensor(matches).to(iouv.device)
+ correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+ return correct
+
+@torch.no_grad()
+def run(model, val_dataloader, num_class, net_dim=416, nms_thresh=0.6, objectness_thresh=0.001, device="cpu"):
+ model.eval()
+ nc = int(num_class) # number of classes
+ iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
+ niou = iouv.numel()
+
+ p, r, f1, mp, mr, map50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+ stats, ap, ap_class = [], [], []
+
+ for batch_i, (im, targets) in tqdm(enumerate(val_dataloader)):
+ im = im.to(device)
+ targets = targets.to(device)
+ nb = im.shape[0] # batch
+
+ # Inference
+ out = model(im) # inference
+
+ # NMS
+ targets[:, 2:] *= torch.Tensor([net_dim, net_dim, net_dim, net_dim]).to(device) # to pixels
+ out = get_predictions(
+ pred=out.to(device), num_classes=nc,
+ objectness_confidence=objectness_thresh,
+ nms_confidence_level=nms_thresh, device=device
+ )
+
+ # Metrics
+ for si in range(nb):
+ labels = targets[targets[:, 0] == si, 1:]
+ pred = out[out[:, 0]==si, :] if isinstance(out, torch.Tensor) else torch.zeros((0,0), device=device)
+ nl = len(labels)
+ tcls = labels[:, 0].tolist() if nl else [] # target class
+
+ if len(pred) == 0:
+ if nl:
+ stats.append((torch.zeros(0, niou, dtype=torch.bool, device="cpu"), torch.Tensor(device="cpu"), torch.Tensor(device="cpu"), tcls))
+ continue
+
+ # Predictions
+ if nc==1:
+ pred[:, 7] = 0
+
+ if pred.shape[0] > 300:
+ pred = pred[:300, :] # sorted by confidence
+
+ predn = pred.clone()
+
+ # Evaluate
+ if nl:
+ tbox = xywh2xyxy(labels[:, 1:5]).to(device) # target boxes
+ labelsn = torch.cat((labels[:, 0:1], tbox), 1).to(device) # native-space labels
+ correct = process_batch(predn, labelsn, iouv)
+ else:
+ correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
+ stats.append((correct.cpu(), pred[:, 5].cpu(), pred[:, 7].cpu(), tcls)) # (correct, conf, pcls, tcls)
+
+ # Compute metrics
+ stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
+ if len(stats) and stats[0].any():
+ p, r, ap, f1, ap_class = ap_per_class(*stats)
+ ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
+ mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+
+ return mp, mr, map50, map
+
diff --git a/imageai/Detection/README.md b/imageai/Detection/README.md
index c6b592b7..07f489f4 100644
--- a/imageai/Detection/README.md
+++ b/imageai/Detection/README.md
@@ -1,56 +1,83 @@
-# ImageAI : Object Detection
-
- - RetinaNet(Size = 145 mb, high performance and accuracy, with longer detection time)
-
-- YOLOv3(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)
-
-- TinyYOLOv3(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)
+# ImageAI : Object Detection
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
+
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
+
+
+[](https://jarvis.genxr.co)
+
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
+
+
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
+
+
+[](https://www.genxr.co/theia-engine)
+
+
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
+
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+### TABLE OF CONTENTS
+
+- :white_square_button: First Object Detection
+- :white_square_button: Object Detection, Extraction and Fine-tune
+- :white_square_button: Custom Object Detection
+- :white_square_button: Detection Speed
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Image Input & Output Types
+- :white_square_button: Documentation
+
+
+ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class supports RetinaNet, YOLOv3 and TinyYOLOv3. To start performing object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:
+* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth)** _(Size = 130 mb, high performance and accuracy, with longer detection time)_
+* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
+* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_
+
+
Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
- Then create a python file and give it a name; an example is FirstObjectDetection.py. Then write the code below into the python file:
+ Then create a python file and give it a name; an example is FirstObjectDetection.py. Then write the code below into the python file:
+### FirstObjectDetection.py
-
-
-
Let us make a breakdown of the object detection code that we used above.
-
+```python
from imageai.Detection import ObjectDetection
import os
execution_path = os.getcwd()
-
- In the 3 lines above , we import the ImageAI object detection class in the first line, import the os in the second line and obtained
- the path to folder where our python file runs.
-
+```
+
+ In the 3 lines above , we import the **ImageAI object detection** class in the first line, import the `os` in the second line and obtained the path to folder where our python file runs.
+
+```python
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()
-
- In the 4 lines above, we created a new instance of the ObjectDetection class in the first line, set the model type to YOLOv3 in the second line,
- set the model path to the YOLOv3 model file we downloaded and copied to the python file folder in the third line and load the model in the
- fourth line.
+```
+
+In the 4 lines above, we created a new instance of the `ObjectDetection` class in the first line, set the model type to YOLOv3 in the second line, set the model path to the YOLOv3 model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.
-
+```
+
+In the 2 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our image, and the path to the new image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object), `percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).
-In the 2 lines above, we ran the detectObjectsFromImage() function and parse in the path to our image, and the path to the new
- image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding
- to the number of objects detected in the image. Each dictionary has the properties name (name of the object),
-percentage_probability (percentage probability of the detection) and box_points ( the x1,y1,x2 and y2 coordinates of the bounding box of the object).
+Should you want to use the RetinaNet which is appropriate for high-performance and high-accuracy demanding detection tasks, you will download the RetinaNet model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
-Should you want to use the RetinaNet which is appropriate for high-performance and high-accuracy demanding detection tasks, you will download the RetinaNet model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
-
+```
-However, if you desire TinyYOLOv3 which is optimized for speed and embedded devices, you will download the TinyYOLOv3 model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
-
+However, if you desire TinyYOLOv3 which is optimized for speed and embedded devices, you will download the TinyYOLOv3 model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
+
+```python
detector = ObjectDetection()
detector.setModelTypeAsTinyYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo-tiny.h5"))
+detector.setModelPath( os.path.join(execution_path , "tiny-yolov3.pt"))
detector.loadModel()
-
-
-
-
+```
+## Object Detection, Extraction and Fine-tune
-
>> Object Detection, Extraction and Fine-tune
-In the examples we used above, we ran the object detection on an image and it
-returned the detected objects in an array as well as save a new image with rectangular markers drawn
- on each object. In our next examples, we will be able to extract each object from the input image
+In the examples we used above, we ran the object detection on an image and it returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image
and save it independently.
-
-
- In the example code below which is very identical to the previous object detction code, we will save each object
- detected as a seperate image.
-
from imageai.Detection import ObjectDetection
+In the example code below which is very identical to the previous object detction code, we will save each object detected as a seperate image.
+
+```python
+from imageai.Detection import ObjectDetection
import os
execution_path = os.getcwd()
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()
detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30, extract_detected_objects=True)
@@ -159,116 +177,69 @@ for eachObject, eachObjectPath in zip(detections, objects_path):
print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
print("Object's image saved in " + eachObjectPath)
print("--------------------------------")
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
Sample Result:
-
-
-
Input Image
-
-
Output Images
-
-
-
dog
-
-
-
-
motorcycle
-
-
-
-
car
-
-
-
-
bicycle
-
-
-
-
person
-
-
-
-
person
-
-
-
-
person
-
-
-
person
-
-
-
-
person
-
-
-
-
-
Let us review the part of the code that perform the object detection and extract the images:
-
-
-In the above above lines, we called the detectObjectsFromImage() , parse in the input image path, output image part, and an
-extra parameter extract_detected_objects=True. This parameter states that the function should extract each object detected from the image
-and save it has a seperate image. The parameter is false by default. Once set to true, the function will create a directory
- which is the output image path + "-objects" . Then it saves all the extracted images into this new directory with
- each image's name being the detected object name + "-" + a number which corresponds to the order at which the objects
- were detected.
-
-This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The
- first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths
- to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the
- first array.
-
-
-
And one important feature you need to know!
You will recall that the percentage probability
- for each detected object is sent back by the detectObjectsFromImage() function. The function has a parameter
- minimum_percentage_probability , whose default value is 50 (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected
- object if it's percentage probability is 30 or above. The value was kept at this number to ensure the integrity of the
- detection results. You fine-tune the object
- detection by setting minimum_percentage_probability equal to a smaller value to detect more number of objects or higher value to detect less number of objects.
-
-
+```
+
+In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image path, and an extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory which is the **output image path + "-objects"** . Then it saves all the extracted images into this new directory with each image's name being the **detected object name + "-" + a number** which corresponds to the order at which the objects were detected.
+
+This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the first array.
+
+
+**And one important feature you need to know!** You will recall that the percentage probability
+ for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter `minimum_percentage_probability`, whose default value is `50` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the detection results. You fine-tune the object detection by setting **minimum_percentage_probability** equal to a smaller value to detect more number of objects or higher value to detect less number of objects.
+
+## Custom Object Detection
-
>> Custom Object Detection
-The object detection model (RetinaNet) supported by ImageAI can detect 80 different types of objects. They include:
-
-
-Interestingly, ImageAI allow you to perform detection for one or more of the items above. That means you can
- customize the type of object(s) you want to be detected in the image. Let's take a look at the code below:
-
-
from imageai.Detection import ObjectDetection
+
+The object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
+```
+person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
+parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,
+bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,
+dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven,
+toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer, toothbrush.
+```
+
+Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can
+ customize the type of object(s) you want to be detected in the image. Let's take a look at the code below:
+
+```python
+from imageai.Detection import ObjectDetection
import os
execution_path = os.getcwd()
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
-detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()
custom_objects = detector.CustomObjects(car=True, motorcycle=True)
@@ -277,78 +248,66 @@ detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects
for eachObject in detections:
print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
print("--------------------------------")
+```
-
+
-
Result:
-
-
-
-
-
Let us take a look at the part of the code that made this possible.
-
In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
-"custom_objects = detector.CustomObjects()", in which we set its car and motorcycle properties equal to True.
-This is to tell the model to detect only the object we set to True. Then we call the "detector.detectCustomObjectsFromImage()"
-which is the function that allows us to perform detection of custom objects. Then we will set the "custom_objects" value
+`custom_objects = detector.CustomObjects()`, in which we set its car and motorcycle properties equal to **True**.
+This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromImage()`
+which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
to the custom objects variable we defined.
-
-
-
>> Detection Speed
- ImageAI now provides detection speeds for all object detection tasks. The detection speeds allow you to reduce
- the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
-results. Coupled with lowering the minimum_percentage_probability parameter, detections can match the normal
-speed and yet reduce detection time drastically. The available detection speeds are "normal"(default), "fast", "faster" , "fastest" and "flash".
-All you need to do is to state the speed mode you desire when loading the model as seen below.
-
-
detector.loadModel(detection_speed="fast")
-
-
+## Hiding/Showing Object Name and Probability
-
>> Hiding/Showing Object Name and Probability
-ImageAI provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the detectObjectsFromImage() and detectCustomObjectsFromImage() functions, the parameters 'display_object_name' and 'display_percentage_probability' can be set to True of False individually. Take a look at the code below:
-
+**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `display_object_name` and `display_percentage_probability` can be set to True of False individually. Take a look at the code below:
- In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.
-
Result
-
+```python
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
+```
+In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.
-
-ImageAI supports 3 input types of inputs which are file path to image file(default), numpy array of image and image file stream
-as well as 2 types of output which are image file(default) and numpy array .
+
+**ImageAI** supports 3 types of inputs which are **file path to image file**(default), **numpy array of image** and **image file stream**
+as well as 2 types of output which are image **file**(default) and numpy **array **.
This means you can now perform object detection in production applications such as on a web server and system
that returns file in any of the above stated formats.
- To perform object detection with numpy array or file stream input, you just need to state the input type
-in the .detectObjectsFromImage() function or the .detectCustomObjectsFromImage() function. See example below.
-
detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "image.jpg")) # For numpy array input type
-detections = detector.detectObjectsFromImage(input_type="stream", input_image=image_stream , output_image_path=os.path.join(execution_path , "test2new.jpg")) # For file stream input type
To perform object detection with numpy array output you just need to state the output type
-in the .detectObjectsFromImage() function or the .detectCustomObjectsFromImage() function. See example below.
+To perform object detection with numpy array or file stream input, you just need to state the input type
+in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.
+
+```python
+detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "image.jpg")) # For numpy array input type
+detections = detector.detectObjectsFromImage(input_type="stream", input_image=image_stream , output_image_path=os.path.join(execution_path , "test2new.jpg")) # For file stream input type
+```
-
detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="image.jpg" ) # For numpy array output type
-
+To perform object detection with numpy array output you just need to state the output type
+in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.
-
+```python
+detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="image.jpg" ) # For numpy array output type
+```
+
+## Documentation
-
>> Documentation
-We have provided full documentation for all ImageAI classes and functions in 2 major languages. Find links below:
- >> Documentation - English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
- >> Documentation - Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
+We have provided full documentation for all **ImageAI** classes and functions. Find links below:
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
\ No newline at end of file
diff --git a/imageai/Detection/VIDEO.md b/imageai/Detection/VIDEO.md
index 96b8df9d..82d96d16 100644
--- a/imageai/Detection/VIDEO.md
+++ b/imageai/Detection/VIDEO.md
@@ -1,162 +1,190 @@
-# ImageAI : Video Object Detection, Tracking and Analysis
-
- - RetinaNet(Size = 145 mb, high performance and accuracy, with longer detection time)
-
-- YOLOv3(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)
-
-- TinyYOLOv3(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)
-
-Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
- installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this
- experiment as it has an NVIDIA K80 GPU available.
-
+# ImageAI : Video Object Detection, Tracking and Analysis
+
+## ---------------------------------------------------
+## Introducing Jarvis and TheiaEngine.
+
+We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers.
+
+
+[](https://jarvis.genxr.co)
+
+Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.
+
+
+Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.
+
+
+[](https://www.genxr.co/theia-engine)
+
+
+[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
+- **Detect 300+ objects** ( 220 more objects than ImageAI)
+- **Provide answers to any content or context questions** asked on an image
+ - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
+- **Generate scene description and summary**
+- **Convert 2D image to 3D pointcloud and triangular mesh**
+- **Semantic Scene mapping of objects, walls, floors, etc**
+- **Stateless Face recognition and emotion detection**
+- **Image generation and augmentation from prompt**
+- etc.
+
+Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
+## ---------------------------------------------------
+
+## TABLE OF CONTENTS
+
+- :white_square_button: First Video Object Detection
+- :white_square_button: Custom Video Object Detection (Object Tracking)
+- :white_square_button: Camera / Live Stream Video Detection
+- :white_square_button: Video Analysis
+- :white_square_button: Detection Speed
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Frame Detection Intervals
+- :white_square_button: Video Detection Timeout (NEW)
+- :white_square_button: Documentation
+
+ImageAI provides convenient, flexible and powerful methods to perform object detection on videos. The video object detection class provided only supports RetinaNet, YOLOv3 and TinyYOLOv3. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
+
+To start performing video object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:
+
+* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth)** _(Size = 130 mb, high performance and accuracy, with longer detection time)_
+* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
+* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_
+
+Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available for free.
+
Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
- Then create a python file and give it a name; an example is FirstVideoObjectDetection.py. Then write the code below into the python file:
+ Then create a python file and give it a name; an example is `FirstVideoObjectDetection.py`. Then write the code below into the python file:
+### FirstVideoObjectDetection.py
-
-
-
+```
+
+
+Input Video (a 1min 24seconds video)
+
+[](https://github.com/OlafenwaMoses/ImageAI/blob/master/data-videos/traffic.mp4)
+
+Output Video
+[](https://www.youtube.com/embed/qplVDqOmElI?rel=0)
+
Let us make a breakdown of the object detection code that we used above.
-
+```python
from imageai.Detection import VideoObjectDetection
import os
execution_path = os.getcwd()
-
- In the 3 lines above , we import the ImageAI video object detection class in the first line, import the os in the second line and obtained
+```
+
+ In the 3 lines above , we import the **ImageAI video object detection ** class in the first line, import the **os** in the second line and obtained
the path to folder where our python file runs.
-
- In the 4 lines above, we created a new instance of the VideoObjectDetection class in the first line, set the model type to RetinaNet in the second line,
- set the model path to the RetinaNet model file we downloaded and copied to the python file folder in the third line and load the model in the
- fourth line.
+```
+
+In the 4 lines above, we created a new instance of the **VideoObjectDetection** class in the first line, set the model type to RetinaNet in the second line, set the model path to the RetinaNet model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.
-
-
-In the 2 lines above, we ran the detectObjectsFromVideo() function and parse in the path to our video,the path to the new
- video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that
- you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video
- which contains boxes and percentage probabilities rendered on objects detected in the video.
+```
+In the 2 lines above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video which contains boxes and percentage probabilities rendered on objects detected in the video.
+### Custom Video Object Detection
-
Custom Video Object Detection
-The video object detection model (RetinaNet) supported by ImageAI can detect 80 different types of objects. They include:
-
-
-Interestingly, ImageAI allow you to perform detection for one or more of the items above. That means you can
- customize the type of object(s) you want to be detected in the video. Let's take a look at the code below:
-
-
from imageai.Detection import VideoObjectDetection
+
+The video object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
+```
+ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
+ parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+ giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+ sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,
+ bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+ broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,
+ dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave,
+ oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer,
+ toothbrush.
+```
+
+
+Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can customize the type of object(s) you want to be detected in the video. Let's take a look at the code below:
+
+```python
+from imageai.Detection import VideoObjectDetection
import os
execution_path = os.getcwd()
detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
-detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()
custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)
-video_path = detector.detectCustomObjectsFromVideo(custom_objects=custom_objects, input_file_path=os.path.join(execution_path, "traffic.mp4"),
- output_file_path=os.path.join(execution_path, "traffic_custom_detected")
- , frames_per_second=20, log_progress=True)
+video_path = detector.detectCustomObjectsFromVideo(
+ custom_objects=custom_objects,
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
+ frames_per_second=20, log_progress=True)
print(video_path)
-
-
+```
Let us take a look at the part of the code that made this possible.
-
+```python
+custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)
+
+video_path = detector.detectCustomObjectsFromVideo(
+ custom_objects=custom_objects,
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
+ frames_per_second=20, log_progress=True)
+```
+
In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
-"custom_objects = detector.CustomObjects()", in which we set its person, car and motorccyle properties equal to True.
-This is to tell the model to detect only the object we set to True. Then we call the "detector.detectCustomObjectsFromVideo()"
-which is the function that allows us to perform detection of custom objects. Then we will set the "custom_objects" value
+`custom_objects = detector.CustomObjects()`, in which we set its person, car and motorcycle properties equal to **True**.
+This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromVideo()`
+which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
to the custom objects variable we defined.
-
-
+Output Video
+[](https://www.youtube.com/embed/YfAycAzkwPM?rel=0)
+C:\Users\User\PycharmProjects\ImageAITest\traffic_custom_detected.avi
-
+### Camera / Live Stream Video Detection
-
Camera / Live Stream Video Detection
-ImageAI now allows live-video detection with support for camera inputs. Using OpenCV's VideoCapture() function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into ImageAI's detectObjectsFromVideo() and detectCustomObjectsFromVideo() functions. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.
-
+**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s `VideoCapture()` function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` functions. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.
+
+```python
from imageai.Detection import VideoObjectDetection
import os
import cv2
@@ -168,27 +196,28 @@ camera = cv2.VideoCapture(0)
detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
-detector.setModelPath(os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.setModelPath(os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()
-video_path = detector.detectObjectsFromVideo(camera_input=camera,
- output_file_path=os.path.join(execution_path, "camera_detected_video")
- , frames_per_second=20, log_progress=True, minimum_percentage_probability=40)
-
+video_path = detector.detectObjectsFromVideo(
+ camera_input=camera,
+ output_file_path=os.path.join(execution_path, "camera_detected_video"),
+ frames_per_second=20, log_progress=True, minimum_percentage_probability=40)
+```
-The difference in the code above and the code for the detection of a video file is that we defined an OpenCV VideoCapture instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter camera_input which replaces the input_file_path that is used for video file.
+The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter `camera_input` which replaces the `input_file_path` that is used for video file.
-
+### Video Analysis
-
Video Analysis
-ImageAI now provide commercial-grade video analysis in the Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with ImageAI. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.
+**ImageAI** now provide commercial-grade video analysis in the Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.
+
+For video analysis, the `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.
-For video analysis, the detectObjectsFromVideo() and detectCustomObjectsFromVideo() now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.
-To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the per_frame_function, per_second_function, per_minute_function and video_complete_function parameters in the detection function. Find below examples of video analysis functions.
+To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the `per_frame_function`, `per_second_function`, `per_minute_function` and `video_complete_function` parameters in the detection function. Find below examples of video analysis functions.
-
-
- When the detection starts on a video feed, be it from a video file or camera input, the result will have the format as below:
- *Results for the Frame function
-
+video_detector.detectObjectsFromVideo(
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=10,
+ per_second_function=forSeconds,
+ per_frame_function=forFrame,
+ per_minute_function=forMinute,
+ minimum_percentage_probability=30
+)
+```
+
+When the detection starts on a video feed, be it from a video file or camera input, the result will have the format as below:
+
+**Results for the Frame function**
+```
FOR FRAME : 1
Output for each object : [{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}]
@@ -228,15 +265,16 @@ Output for each object : [{'box_points': (362, 295, 443, 355), 'name': 'boat', '
Output count for unique objects : {'bus': 4, 'boat': 3, 'person': 1, 'car': 8}
------------END OF A FRAME --------------
-
+```
-For any function you parse into the per_frame_function, the function will be executed after every single video frame is processed and he following will be parsed into it:
- >> Frame Index : This is the position number of the frame inside the video (e.g 1 for first frame and 20 for twentieth frame).
- >> Output Array : This is an array of dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
- >> Output Count : This is a dictionary that has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
+For any function you parse into the **per_frame_function**, the function will be executed after every single video frame is processed and he following will be parsed into it:
- *Results for the Second function
-
+* **Frame Index:** This is the position number of the frame inside the video (e.g 1 for first frame and 20 for twentieth frame).
+* **Output Array:** This is an array of dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
+* **Output Count:** This is a dictionary that has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
+
+**Results for the Second function**
+```
FOR SECOND : 1
Array for the outputs of each frame [[{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}],
@@ -258,30 +296,37 @@ Array for output count for unique objects in each frame : [{'bus': 4, 'boat': 3,
Output average count for unique objects in the last second: {'truck': 0.5, 'bus': 3.7, 'umbrella': 0.8, 'boat': 1.3, 'person': 1.0, 'car': 6.6}
------------END OF A SECOND --------------
-
+```
+
+In the above result, the video was processed and saved in 10 frames per second (FPS). For any function you parse into the **per_second_function**, the function will be executed after every single second of the video that is processed and he following will be parsed into it:
-In the above result, the video was processed and saved in 10 frames per second (FPS). For any function you parse into the per_second_function, the function will be executed after every single second of the video that is processed and he following will be parsed into it:
- >> Second Index : This is the position number of the second inside the video (e.g 1 for first second and 20 for twentieth second).
- >> Output Array : This is an array of arrays, with each contained array and its position (array index + 1) corresponding to the equivalent frame in the last second of the video (In the above example, their are 10 arrays which corresponds to the 10 frames contained in one second). Each contained array contains dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
- >> Count arrays : This is an array of dictionaries. Each dictionary and its position (array index + 1) corresponds to the equivalent frame in the last second of he video. Each dictionary has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
- >> Average Output Count : This is a dictionary that has the name of each unique object detected in the last second as its keys and the average number of instances of the objects detected across the number of frames as the values.
+- **Second Index:** This is the position number of the second inside the video (e.g 1 for first second and 20 for twentieth second).
+- **Output Array:** This is an array of arrays, with each contained array and its position (array index + 1) corresponding to the equivalent frame in the last second of the video (In the above example, their are 10 arrays which corresponds to the 10 frames contained in one second). Each contained array contains dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
+- **Count arrays:** This is an array of dictionaries. Each dictionary and its position (array index + 1) corresponds to the equivalent frame in the last second of he video. Each dictionary has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
+- **Average Output Count:** This is a dictionary that has the name of each unique object detected in the last second as its keys and the average number of instances of the objects detected across the number of frames as the values.
- *Results for the Minute function
-The above set of 4 parameters that are returned for every second of the video processed is the same parameters to that will be returned for every minute of the video processed. The difference is that the index returned corresponds to the minute index, the output_arrays is an array that contains the number of FPS * 60 number of arrays (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 arrays), and the count_arrays is an array that contains the number of FPS * 60 number of dictionaries (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 dictionaries) and the average_output_count is a dictionary that covers all the objects detected in all the frames contained in the last minute.
+**Results for the Minute function**
+The above set of **4 parameters** that are returned for every second of the video processed is the same parameters to that will be returned for every minute of the video processed. The difference is that the index returned corresponds to the minute index, the **output_arrays** is an array that contains the number of FPS * 60 number of arrays (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 arrays), and the **count_arrays** is an array that contains the number of FPS * 60 number of dictionaries (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 dictionaries) and the **average_output_count** is a dictionary that covers all the objects detected in all the frames contained in the last minute.
-
- ***Results for the Video Complete Function
-ImageAI allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the video_complete_function parameter into your .detectObjectsFromVideo() or .detectCustomObjectsFromVideo() function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function:
-
+**Results for the Video Complete Function**
+**ImageAI** allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()` function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function:
+
+```python
def forFull(output_arrays, count_arrays, average_output_count):
#Perform action on the 3 parameters returned into the function
-video_detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"), output_file_path=os.path.join(execution_path, "traffic_detected") , frames_per_second=10, video_complete_function=forFull, minimum_percentage_probability=30)
+video_detector.detectObjectsFromVideo(
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=10,
+ video_complete_function=forFull,
+ minimum_percentage_probability=30
+)
+```
+
+**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set `return_detected_frame=True` in your `detectObjectsFromVideo()` or `detectCustomObjectsFrom()` function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:
-
-
-FINAL NOTE ON VIDEO ANALYSIS : ImageAI allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set return_detected_frame=True in your detectObjectsFromVideo() or detectCustomObjectsFrom() function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:
-
+```python
def forFrame(frame_number, output_array, output_count, detected_frame):
print("FOR FRAME " , frame_number)
print("Output for each object : ", output_array)
@@ -289,116 +334,56 @@ def forFrame(frame_number, output_array, output_count, detected_frame):
print("Returned Objects is : ", type(detected_frame))
print("------------END OF A FRAME --------------")
-video_detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"), output_file_path=os.path.join(execution_path, "traffic_detected") , frames_per_second=10, per_frame_function=forFrame, minimum_percentage_probability=30, return_detected_frame=True)
-
-
-
-
-
-
Video Detection Speed
- ImageAI now provides detection speeds for all video object detection tasks. The detection speeds allow you to reduce
- the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
-results. Coupled with lowering the minimum_percentage_probability parameter, detections can closely match the normal
-speed and yet reduce detection time drastically. The available detection speeds are "normal"(default), "fast", "faster" , "fastest" and "flash".
-All you need to do is to state the speed mode you desire when loading the model as seen below.
-
-
detector.loadModel(detection_speed="fast")
-
-To observe the differences in the detection speeds, look below for each speed applied to object detection with
- coupled with the adjustment of the minimum_percentage_probability , time taken to detect and detections given.
-The results below are obtained from detections performed on a NVIDIA K80 GPU. Links are provided below to download
- the videos for each detection speed applied.
+The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up your video detection process. When calling the `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()`, you can specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
+If your output video **frames_per_second** is set to 20, that means the object detections in the video will be updated once in every quarter of a second or every second. This is useful in case scenarious where the available compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time , half-a-second-real-time or whichever way suits your needs. We conducted video object detection on the same input video we have been using all this while by applying a **frame_detection_interval** value equal to 5.
-
-Video Length = 1min 24seconds, Detection Speed = "fast" , Minimum Percentage Probability = 40, Detection Time = 11min 6seconds
+**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed.
+To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes).
-Video Length = 1min 24seconds, Detection Speed = "fastest" , Minimum Percentage Probability = 20, Detection Time = 6min 20seconds
-
-
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+import cv2
-Video Length = 1min 24seconds, Detection Speed = "flash" , Minimum Percentage Probability = 10, Detection Time = 3min 55seconds
-
-
-If you use more powerful NVIDIA GPUs, you will definitely have faster detection time than stated above.
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
-
-The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame
-of the video is detected. ImageAI provides you the option to adjust the video frame detections which can speed up
-your video detection process. When calling the .detectObjectsFromVideo() or .detectCustomObjectsFromVideo(), you can
-specify at which frame interval detections should be made. By setting the frame_detection_interval parameter to be
- equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
-If your output video frames_per_second is set to 20, that means the object detections in the video will
- be updated once in every quarter of a second or every second. This is useful in case scenarious where the available
- compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time
-, half-a-second-real-time or whichever way suits your needs. We conducted video object detection on the same input
- video we have been using all this while by applying a frame_detection_interval value equal to 5.
-The results below are obtained from detections performed on a NVIDIA K80 GPU.
-See the results and link to download the videos below:
-
-
-
-
- ImageAI provides 4 different algorithms and model types to perform custom image prediction using your custom models.
-You will be able to use your model trained with ImageAI and the corresponding model_class JSON file to predict custom objects
-that you have trained the model on. In this example, we will be using the model trained for 20 experiments on IdenProf, a dataset
- of uniformed professionals and achieved 65.17% accuracy on the test dataset (You can use your own trained model and generated JSON file. This 'class' is provided mainly for the purpose to use your own custom models.). Download the ResNet model of the model and JSON files in links below:
- - ResNet (Size = 90.4 mb)
- - IdenProf model_class.json file
- Great! Once you have downloaded this model file and the JSON file, start a new python project, and then copy the model file
-and the JSON file to your project folder where your python files (.py files) will be . Download the image below, or take any image on your computer
- that include any of the following professionals(Chef, Doctor, Engineer, Farmer, Fireman, Judge, Mechanic, Pilot, Police and Waiter)
-and copy it to your python project's folder. Then create a python file and give it a name; an example is FirstCustomPrediction.py.
- Then write the code below into the python file:
- In the lines above, we created and instance of the CustomImagePrediction()
- class in the first line, then we set the model type of the prediction object to ResNet by caling the .setModelTypeAsResNet()
- in the second line, we set the model path of the prediction object to the path of the custom model file (resnet_model_ex-020_acc-0.651714.h5) we copied to the python file folder
- in the third line, we set the path to the model_class.json of the model, we load the model and parse the number of objected that can be predicted in the model.
-
-
In the above line, we defined 2 variables to be equal to the function
- called to predict an image, which is the .predictImage() function, into which we parsed the path to
- our image and also state the number of prediction results we want to have (values from 1 to 10 in this case) parsing
- result_count=5 . The .predictImage() function will return 2 array objects with the first (predictions) being
- an array of predictions and the second (percentage_probabilities) being an array of the corresponding percentage probability for each
- prediction.
-
-
for eachPrediction, eachProbability in zip(predictions, probabilities):
- print(eachPrediction + " : " + eachProbability)
The above line obtains each object in the predictions array, and also
-obtains the corresponding percentage probability from the percentage_probabilities, and finally prints
-the result of both to console.
-
-
-
-
-
-
-CustomImagePrediction class also supports the multiple predictions, input types and prediction speeds that are contained
-in the ImagePrediction class. Follow this link to see all the details.
-
-
-
>> Documentation
-We have provided full documentation for all ImageAI classes and functions in 2 major languages. Find links below:
-
- >> Documentation - English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
- >> Documentation - Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
-
diff --git a/imageai/Prediction/CUSTOMTRAINING.md b/imageai/Prediction/CUSTOMTRAINING.md
deleted file mode 100644
index e9d44419..00000000
--- a/imageai/Prediction/CUSTOMTRAINING.md
+++ /dev/null
@@ -1,613 +0,0 @@
-# ImageAI : Custom Prediction Model Training
-
-
-ImageAI provides the most simple and powerful approach to training custom image prediction models
-using state-of-the-art SqueezeNet, ResNet50, InceptionV3 and DenseNet
-which you can load into the imageai.Prediction.Custom.CustomImagePrediction class. This allows
- you to train your own model on any set of images that corresponds to any type of objects/persons.
-The training process generates a JSON file that maps the objects types in your image dataset
-and creates lots of models. You will then peak the model with the highest accuracy and perform custom
-image prediction using the model and the JSON file generated.
-
-Because model training is a compute intensive tasks, we strongly advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
- installed. Performing model training on CPU will my take hours or days. With NVIDIA GPU powered computer system, this will take
- a few hours. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available.
-
-To train a custom prediction model, you need to prepare the images you want to use to train the model.
-You will prepare the images as follows:
-
-1. Create a dataset folder with the name you will like your dataset to be called (e.g pets)
-2. In the dataset folder, create a folder by the name train
-3. In the dataset folder, create a folder by the name test
-4. In the train folder, create a folder for each object you want to the model to predict and give
- the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
-5. In the test folder, create a folder for each object you want to the model to predict and give
- the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
-6. In each folder present in the train folder, put the images of each object in its respective folder.
-This images are the ones to be used to train the model
-To produce a model that can perform well in practical applications, I recommend you about 500 or more
- images per object. 1000 images per object is just great
-7. In each folder present in the test folder, put about 100 to 200 images of each object in its respective folder.
-These images are the ones to be used to test the model as it trains
-8. Once you have done this, the structure of your image dataset folder should look like below:
- Yes! Just 5 lines of code and you can train any of the available 4 state-of-the-art Deep Learning algorithms on your custom dataset.
-Now lets take a look at how the code above works.
-
from imageai.Prediction.Custom import ModelTraining
-
-model_trainer = ModelTraining()
-model_trainer.setModelTypeAsResNet()
-model_trainer.setDataDirectory("pets")
-
-In the first line, we import the ImageAI model training class, then we define the model trainer in the second line,
- we set the network type in the third line and set the path to the image dataset we want to train the network on.
-
-
-
-
-In the code above, we start the training process. The parameters stated in the function are as below:
-- num_objects : this is to state the number of object types in the image dataset
-- num_experiments : this is to state the number of times the network will train over all the training images,
- which is also called epochs
-- enhance_data (optional) : This is used to state if we want the network to produce modified copies of the training
-images for better performance.
-- batch_size : This is to state the number of images the network will process at ones. The images
- are processed in batches until they are exhausted per each experiment performed.
-- show_network_summary : This is to state if the network should show the structure of the training
- network in the console.
-
-
-When you start the training, you should see something like this in the console:
-
-
-
-Let us explain the details shown above:
-1. The line Epoch 1/100 means the network is training the first experiment of the targeted 100
-2. The line 1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500
- represents the number of batches that has been trained in the present experiment
-3. The line Epoch 00000: saving model to C:\Users\User\PycharmProjects\ImageAITest\pets\models\model_ex-000_acc-0.100000.h5
- refers to the model saved after the present experiment. The ex_000 represents the experiment at this stage
- while the acc_0.100000 and val_acc: 0.1000 represents the accuracy of the model on the test images after the present experiment (maximum value value
- of accuracy is 1.0). This result helps to know the best performed model you can use for custom image prediction.
- Once you are done training your custom model, you can use the "CustomImagePrediction" class to perform image prediction with your model. Simply follow the link below.
A sample from the IdenProf Dataset used to train a Model for predicting professionals.
-
-
-Below we provide a sample code to train on IdenProf, a dataset which contains images of 10
- uniformed professionals. The code below will download the dataset and initiate the training:
-
-We are providing an opportunity for anyone that uses to train a model to submit the model and its JSON mapping file
- and have it listed in this repository. Reach to the details below should intend to share your trained model in this repository.
-▣ First Prediction
-▣ Prediction Speed
-▣ Image Input Types
-▣ Multiple Images Prediction
-▣ Prediction in MultiThreading
-▣ Documentation
-
- ImageAI provides 4 different algorithms and model types to perform image prediction.
-To perform image prediction on any picture, take the following simple steps. The 4 algorithms provided for
- image prediction include SqueezeNet, ResNet, InceptionV3 and DenseNet. Each of these
- algorithms have individual model files which you must use depending on the choice of your algorithm. To download the
- model file for your choice of algorithm, click on any of the links below:
- - SqueezeNet (Size = 4.82 mb, fastest prediction time and moderate accuracy)
- - ResNet50 by Microsoft Research (Size = 98 mb, fast prediction time and high accuracy)
- - InceptionV3 by Google Brain team (Size = 91.6 mb, slow prediction time and higher accuracy)
- - DenseNet121 by Facebook AI Research (Size = 31.6 mb, slower prediction time and highest accuracy)
- Great! Once you have downloaded this model file, start a new python project, and then copy the model file to your project
- folder where your python files (.py files) will be . Download the image below, or take any image on your computer
- and copy it to your python project's folder. Then create a python file and give it a name; an example is FirstPrediction.py.
- Then write the code below into the python file:
- In the lines above, we created and instance of the ImagePrediction()
- class in the first line, then we set the model type of the prediction object to ResNet by caling the .setModelTypeAsResNet()
- in the second line and then we set the model path of the prediction object to the path of the model file (resnet50_weights_tf_dim_ordering_tf_kernels.h5) we copied to the python file folder
- in the third line.
-
-
In the above line, we defined 2 variables to be equal to the function
- called to predict an image, which is the .predictImage() function, into which we parsed the path to
- our image and also state the number of prediction results we want to have (values from 1 to 1000) parsing
- result_count=5 . The .predictImage() function will return 2 array objects with the first (predictions) being
- an array of predictions and the second (percentage_probabilities) being an array of the corresponding percentage probability for each
- prediction.
-
-
for eachPrediction, eachProbability in zip(predictions, probabilities):
- print(eachPrediction, " : " , eachProbability)
The above line obtains each object in the predictions array, and also
-obtains the corresponding percentage probability from the percentage_probabilities, and finally prints
-the result of both to console.
-
-
-
-
-
-
-
-
>> Multiple Images Prediction
- You can run image prediction on more than one image using a single function, which is the .predictMultipleImages()
- function. It works by doing the following:
- - Define your normal ImagePrediction instance
- - Set the model type and model path
- - Call the .loadModel() function
- - Create an array and add all the string path to each of the images you want to predict to the array.
- - You then perform prediction by calling the .predictMultipleImages() function and parse in the array of images, and also set the number
- predictions you want per image by parsing result_count_per_image=5 (default value is 2)
-
- Find the sample code below:
-
-from imageai.Prediction import ImagePrediction
-import os
-
-execution_path = os.getcwd()
-
-multiple_prediction = ImagePrediction()
-multiple_prediction.setModelTypeAsResNet()
-multiple_prediction.setModelPath(os.path.join(execution_path, "resnet50_weights_tf_dim_ordering_tf_kernels.h5"))
-multiple_prediction.loadModel()
-
-all_images_array = []
-
-all_files = os.listdir(execution_path)
-for each_file in all_files:
- if(each_file.endswith(".jpg") or each_file.endswith(".png")):
- all_images_array.append(each_file)
-
-results_array = multiple_prediction.predictMultipleImages(all_images_array, result_count_per_image=5)
-
-for each_result in results_array:
- predictions, percentage_probabilities = each_result["predictions"], each_result["percentage_probabilities"]
- for index in range(len(predictions)):
- print(predictions[index] , " : " , percentage_probabilities[index])
- print("-----------------------")
- In the above code, the .predictMultipleImages() function will return an array which contains a dictionary per image.
- Each dictionary contains the arrays for predictions and percentage probability for each prediction.
-
- ImageAI now provides prediction speeds for all image prediction tasks. The prediction speeds allow you to reduce
- the time of prediction at a rate between 20% - 60%, and yet having just slight changes but accurate prediction
- results. The available prediction speeds are "normal"(default), "fast", "faster" and "fastest".
-All you need to do is to state the speed mode you desire when loading the model as seen below.
-
-
prediction.loadModel(prediction_speed="fast")
-
-To observe the differences in the prediction speeds, look below for each speed applied to multiple prediction with
-time taken to predict and predictions given. The results below are obtained from predictions performed
- on a Windows 8 laptop with Intel Celeron N2820 CPU, with processor speed of 2.13GHz
When adjusting speed modes, it is best to use models that have higher accuracies
- like the DenseNet or InceptionV3 models, or use it in case scenarios where the images predicted are iconic.
-
-
-
-
-
-
>> Image Input Types
-Previous version of ImageAI supported only file inputs and accepts file paths to an image for image prediction.
-Now, ImageAI supports 3 input types which are file path to image file(default), numpy array of image and image file stream.
-This means you can now perform image prediction in production applications such as on a web server and system
- that returns file in any of the above stated formats.
- To perform image prediction with numpy array or file stream input, you just need to state the input type
-in the .predictImage() function or the .predictMultipleImages() function. See example below.
-
-
predictions, probabilities = prediction.predictImage(image_array, result_count=5 , input_type="array" ) # For numpy array input type
-predictions, probabilities = prediction.predictImage(image_stream, result_count=5 , input_type="stream" ) # For file stream input type
-
-
-
-
-
-
>> Prediction in MultiThreading
When developing programs that run heavy task on the deafult thread like User Interfaces (UI),
- you should consider running your predictions in a new thread. When running image prediction using ImageAI in
- a new thread, you must take note the following:
- - You can create your prediction object, set its model type, set model path and json path
-outside the new thread.
- - The .loadModel() must be in the new thread and image prediction (predictImage()) must take place in th new thread.
-
- Take a look of a sample code below on image prediction using multithreading:
-
-We have provided full documentation for all ImageAI classes and functions in 2 major languages. Find links below:
-
- >> Documentation - English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
- >> Documentation - Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
-
diff --git a/imageai/Prediction/ResNet/__pycache__/__init__.cpython-35.pyc b/imageai/Prediction/ResNet/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 98d25f92..00000000
Binary files a/imageai/Prediction/ResNet/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Prediction/ResNet/__pycache__/resnet50.cpython-35.pyc b/imageai/Prediction/ResNet/__pycache__/resnet50.cpython-35.pyc
deleted file mode 100644
index e1f007a3..00000000
Binary files a/imageai/Prediction/ResNet/__pycache__/resnet50.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Prediction/ResNet/resnet50.py b/imageai/Prediction/ResNet/resnet50.py
deleted file mode 100644
index 834b86f2..00000000
--- a/imageai/Prediction/ResNet/resnet50.py
+++ /dev/null
@@ -1,122 +0,0 @@
-from tensorflow.python import keras
-from tensorflow.python.keras import layers
-from tensorflow.python.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPool2D, AvgPool2D, GlobalMaxPool2D, GlobalAvgPool2D, BatchNormalization, add, Input
-from tensorflow.python.keras.models import Model
-
-
-def resnet_module(input, channel_depth, strided_pool=False ):
- residual_input = input
- stride = 1
-
- if(strided_pool):
- stride = 2
- residual_input = Conv2D(channel_depth, kernel_size=1, strides=stride, padding="same")(residual_input)
- residual_input = BatchNormalization()(residual_input)
-
- input = Conv2D(int(channel_depth/4), kernel_size=1, strides=stride, padding="same")(input)
- input = BatchNormalization()(input)
- input = Activation("relu")(input)
-
- input = Conv2D(int(channel_depth / 4), kernel_size=3, strides=1, padding="same")(input)
- input = BatchNormalization()(input)
- input = Activation("relu")(input)
-
- input = Conv2D(channel_depth, kernel_size=1, strides=1, padding="same")(input)
- input = BatchNormalization()(input)
-
- input = add([input, residual_input])
- input = Activation("relu")(input)
-
- return input
-
-
-
-def resnet_first_block_first_module(input, channel_depth):
- residual_input = input
- stride = 1
-
- residual_input = Conv2D(channel_depth, kernel_size=1, strides=1, padding="same")(residual_input)
- residual_input = BatchNormalization()(residual_input)
-
- input = Conv2D(int(channel_depth/4), kernel_size=1, strides=stride, padding="same")(input)
- input = BatchNormalization()(input)
- input = Activation("relu")(input)
-
- input = Conv2D(int(channel_depth / 4), kernel_size=3, strides=stride, padding="same")(input)
- input = BatchNormalization()(input)
- input = Activation("relu")(input)
-
- input = Conv2D(channel_depth, kernel_size=1, strides=stride, padding="same")(input)
- input = BatchNormalization()(input)
-
- input = add([input, residual_input])
- input = Activation("relu")(input)
-
- return input
-
-
-def resnet_block(input, channel_depth, num_layers, strided_pool_first = False ):
- for i in range(num_layers):
- pool = False
- if(i == 0 and strided_pool_first):
- pool = True
- input = resnet_module(input, channel_depth, strided_pool=pool)
-
- return input
-
-def ResNet50(include_top=True, non_top_pooling=None, model_input=None, num_classes=1000, weights='imagenet', model_path=""):
- layers = [3,4,6,3]
- channel_depths = [256, 512, 1024, 2048]
-
- input_object = model_input
-
-
- output = Conv2D(64, kernel_size=7, strides=2, padding="same")(input_object)
- output = BatchNormalization()(output)
- output = Activation("relu")(output)
-
- output = MaxPool2D(pool_size=(3,3), strides=(2,2))(output)
- output = resnet_first_block_first_module(output, channel_depths[0])
-
-
- for i in range(4):
- channel_depth = channel_depths[i]
- num_layers = layers[i]
-
- strided_pool_first = True
- if(i == 0):
- strided_pool_first = False
- num_layers = num_layers - 1
- output = resnet_block(output, channel_depth=channel_depth, num_layers=num_layers, strided_pool_first=strided_pool_first)
-
- if(include_top):
- output = GlobalAvgPool2D(name="global_avg_pooling")(output)
- output = Dense(num_classes)(output)
- output = Activation("softmax")(output)
- else:
- if (non_top_pooling == "Average"):
- output = GlobalAvgPool2D()(output)
- elif (non_top_pooling == "Maximum"):
- output = GlobalMaxPool2D()(output)
- elif (non_top_pooling == None):
- pass
-
- model = Model(inputs=input_object, outputs=output)
-
- if(weights == "imagenet"):
- weights_path = model_path
- model.load_weights(weights_path)
- elif (weights == "trained"):
- weights_path = model_path
- model.load_weights(weights_path)
-
- return model
-
-
-
-
-
-
-
-
-
diff --git a/imageai/Prediction/SqueezeNet/__pycache__/__init__.cpython-35.pyc b/imageai/Prediction/SqueezeNet/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 9100960b..00000000
Binary files a/imageai/Prediction/SqueezeNet/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Prediction/SqueezeNet/__pycache__/squeezenet.cpython-35.pyc b/imageai/Prediction/SqueezeNet/__pycache__/squeezenet.cpython-35.pyc
deleted file mode 100644
index eb332f5f..00000000
Binary files a/imageai/Prediction/SqueezeNet/__pycache__/squeezenet.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Prediction/SqueezeNet/squeezenet.py b/imageai/Prediction/SqueezeNet/squeezenet.py
deleted file mode 100644
index 4a944b20..00000000
--- a/imageai/Prediction/SqueezeNet/squeezenet.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from tensorflow.python.keras.layers import Input, Conv2D, MaxPool2D, Activation, concatenate, Dropout
-from tensorflow.python.keras.layers import GlobalAvgPool2D, GlobalMaxPool2D
-from tensorflow.python.keras.models import Model
-
-
-def squeezenet_fire_module(input, input_channel_small=16, input_channel_large=64):
-
- channel_axis = 3
-
- input = Conv2D(input_channel_small, (1,1), padding="valid" )(input)
- input = Activation("relu")(input)
-
- input_branch_1 = Conv2D(input_channel_large, (1,1), padding="valid" )(input)
- input_branch_1 = Activation("relu")(input_branch_1)
-
- input_branch_2 = Conv2D(input_channel_large, (3, 3), padding="same")(input)
- input_branch_2 = Activation("relu")(input_branch_2)
-
- input = concatenate([input_branch_1, input_branch_2], axis=channel_axis)
-
- return input
-
-def SqueezeNet(include_top = True, weights="imagenet", model_input=None, non_top_pooling=None,
- num_classes=1000, model_path = ""):
-
- if(weights == "imagenet" and num_classes != 1000):
- raise ValueError("You must parse in SqueezeNet model trained on the 1000 class ImageNet")
-
-
-
-
- image_input = model_input
-
-
- network = Conv2D(64, (3,3), strides=(2,2), padding="valid")(image_input)
- network = Activation("relu")(network)
- network = MaxPool2D( pool_size=(3,3) , strides=(2,2))(network)
-
- network = squeezenet_fire_module(input=network, input_channel_small=16, input_channel_large=64)
- network = squeezenet_fire_module(input=network, input_channel_small=16, input_channel_large=64)
- network = MaxPool2D(pool_size=(3,3), strides=(2,2))(network)
-
- network = squeezenet_fire_module(input=network, input_channel_small=32, input_channel_large=128)
- network = squeezenet_fire_module(input=network, input_channel_small=32, input_channel_large=128)
- network = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(network)
-
- network = squeezenet_fire_module(input=network, input_channel_small=48, input_channel_large=192)
- network = squeezenet_fire_module(input=network, input_channel_small=48, input_channel_large=192)
- network = squeezenet_fire_module(input=network, input_channel_small=64, input_channel_large=256)
- network = squeezenet_fire_module(input=network, input_channel_small=64, input_channel_large=256)
-
- if(include_top):
- network = Dropout(0.5)(network)
-
- network = Conv2D(num_classes, kernel_size=(1,1), padding="valid", name="last_conv")(network)
- network = Activation("relu")(network)
-
- network = GlobalAvgPool2D()(network)
- network = Activation("softmax")(network)
-
- else:
- if(non_top_pooling == "Average"):
- network = GlobalAvgPool2D()(network)
- elif(non_top_pooling == "Maximum"):
- network = GlobalMaxPool2D()(network)
- elif(non_top_pooling == None):
- pass
-
- input_image = image_input
- model = Model(inputs=input_image, outputs=network)
-
- if(weights =="imagenet"):
- weights_path = model_path
- model.load_weights(weights_path)
- elif(weights =="trained"):
- weights_path = model_path
- model.load_weights(weights_path)
-
- return model
-
-
-
-
-
-
-
diff --git a/imageai/Prediction/Thumbs.db b/imageai/Prediction/Thumbs.db
deleted file mode 100644
index 204a95e4..00000000
Binary files a/imageai/Prediction/Thumbs.db and /dev/null differ
diff --git a/imageai/Prediction/__init__.py b/imageai/Prediction/__init__.py
deleted file mode 100644
index fa3db385..00000000
--- a/imageai/Prediction/__init__.py
+++ /dev/null
@@ -1,647 +0,0 @@
-import numpy as np
-from tensorflow.python.keras.preprocessing import image
-from PIL import Image
-
-
-from tensorflow.python.keras.layers import Input, Conv2D, MaxPool2D, Activation, concatenate, Dropout
-from tensorflow.python.keras.layers import GlobalAvgPool2D, GlobalMaxPool2D
-from tensorflow.python.keras.models import Model
-from tensorflow.python.keras.models import Sequential
-
-
-class ImagePrediction:
- """
- This is the image prediction class in the ImageAI library. It provides support for 4 different models which are:
- ResNet, SqueezeNet, DenseNet and Inception V3. After instantiating this class, you can set it's properties and
- make image predictions using it's pre-defined functions.
-
- The following functions are required to be called before a prediction can be made
- * setModelPath()
- * At least of of the following and it must correspond to the model set in the setModelPath()
- [setModelTypeAsSqueezeNet(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]
- * loadModel() [This must be called once only before making a prediction]
-
- Once the above functions have been called, you can call the predictImage() function of the prediction instance
- object at anytime to predict an image.
- """
-
- def __init__(self):
- self.__modelType = ""
- self.modelPath = ""
- self.__modelLoaded = False
- self.__model_collection = []
- self.__input_image_size = 224
-
-
- def setModelPath(self, model_path):
- """
- 'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
- available 4 model types. The model path must correspond to the model type set for the prediction instance object.
-
- :param model_path:
- :return:
- """
- self.modelPath = model_path
-
-
- def setModelTypeAsSqueezeNet(self):
- """
- 'setModelTypeAsSqueezeNet()' is used to set the model type to the SqueezeNet model
- for the prediction instance object .
- :return:
- """
- self.__modelType = "squeezenet"
-
- def setModelTypeAsResNet(self):
- """
- 'setModelTypeAsResNet()' is used to set the model type to the ResNet model
- for the prediction instance object .
- :return:
- """
- self.__modelType = "resnet"
-
- def setModelTypeAsDenseNet(self):
- """
- 'setModelTypeAsDenseNet()' is used to set the model type to the DenseNet model
- for the prediction instance object .
- :return:
- """
- self.__modelType = "densenet"
-
- def setModelTypeAsInceptionV3(self):
- """
- 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
- for the prediction instance object .
- :return:
- """
- self.__modelType = "inceptionv3"
-
- def loadModel(self, prediction_speed="normal"):
- """
- 'loadModel()' function is used to load the model structure into the program from the file path defined
- in the setModelPath() function. This function receives an optional value which is "prediction_speed".
- The value is used to reduce the time it takes to predict an image, down to about 50% of the normal time,
- with just slight changes or drop in prediction accuracy, depending on the nature of the image.
- * prediction_speed (optional); Acceptable values are "normal", "fast", "faster" and "fastest"
-
- :param prediction_speed :
- :return:
- """
-
- if(prediction_speed=="normal"):
- self.__input_image_size = 224
- elif(prediction_speed=="fast"):
- self.__input_image_size = 160
- elif(prediction_speed=="faster"):
- self.__input_image_size = 120
- elif (prediction_speed == "fastest"):
- self.__input_image_size = 100
-
- if (self.__modelLoaded == False):
-
- image_input = Input(shape=(self.__input_image_size, self.__input_image_size, 3))
-
- if(self.__modelType == "" ):
- raise ValueError("You must set a valid model type before loading the model.")
-
-
- elif(self.__modelType == "squeezenet"):
- import numpy as np
- from tensorflow.python.keras.preprocessing import image
- from .SqueezeNet.squeezenet import SqueezeNet
- from .imagenet_utils import preprocess_input, decode_predictions
- try:
- model = SqueezeNet(model_path=self.modelPath, model_input=image_input)
- self.__model_collection.append(model)
- self.__modelLoaded = True
- except:
- raise ("You have specified an incorrect path to the SqueezeNet model file.")
- elif(self.__modelType == "resnet"):
- import numpy as np
- from tensorflow.python.keras.preprocessing import image
- from .ResNet.resnet50 import ResNet50
- from .imagenet_utils import preprocess_input, decode_predictions
- try:
- model = ResNet50(model_path=self.modelPath, model_input=image_input)
- self.__model_collection.append(model)
- self.__modelLoaded = True
- except:
- raise ValueError("You have specified an incorrect path to the ResNet model file.")
-
- elif (self.__modelType == "densenet"):
- from tensorflow.python.keras.preprocessing import image
- from .DenseNet.densenet import DenseNetImageNet121, preprocess_input, decode_predictions
- import numpy as np
- try:
- model = DenseNetImageNet121(model_path=self.modelPath, model_input=image_input)
- self.__model_collection.append(model)
- self.__modelLoaded = True
- except:
- raise ValueError("You have specified an incorrect path to the DenseNet model file.")
-
- elif (self.__modelType == "inceptionv3"):
- import numpy as np
- from tensorflow.python.keras.preprocessing import image
-
- from imageai.Prediction.InceptionV3.inceptionv3 import InceptionV3
- from imageai.Prediction.InceptionV3.inceptionv3 import preprocess_input, decode_predictions
-
- try:
- model = InceptionV3(include_top=True, weights="imagenet", model_path=self.modelPath, model_input=image_input)
- self.__model_collection.append(model)
- self.__modelLoaded = True
- except:
- raise ValueError("You have specified an incorrect path to the InceptionV3 model file.")
-
-
-
-
-
-
-
-
- def predictImage(self, image_input, result_count=5, input_type="file" ):
- """
- 'predictImage()' function is used to predict a given image by receiving the following arguments:
- * input_type (optional) , the type of input to be parsed. Acceptable values are "file", "array" and "stream"
- * image_input , file path/numpy array/image file stream of the image.
- * result_count (optional) , the number of predictions to be sent which must be whole numbers between
- 1 and 1000. The default is 5.
-
- This function returns 2 arrays namely 'prediction_results' and 'prediction_probabilities'. The 'prediction_results'
- contains possible objects classes arranged in descending of their percentage probabilities. The 'prediction_probabilities'
- contains the percentage probability of each object class. The position of each object class in the 'prediction_results'
- array corresponds with the positions of the percentage possibilities in the 'prediction_probabilities' array.
-
-
- :param input_type:
- :param image_input:
- :param result_count:
- :return prediction_results, prediction_probabilities:
- """
- prediction_results = []
- prediction_probabilities = []
- if (self.__modelLoaded == False):
- raise ValueError("You must call the loadModel() function before making predictions.")
-
- else:
-
- if (self.__modelType == "squeezenet"):
-
- from .imagenet_utils import preprocess_input, decode_predictions
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- model = self.__model_collection[0]
-
- prediction = model.predict(image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- return prediction_results, prediction_probabilities
- elif (self.__modelType == "resnet"):
-
- model = self.__model_collection[0]
-
- from .imagenet_utils import preprocess_input, decode_predictions
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- return prediction_results, prediction_probabilities
- elif (self.__modelType == "densenet"):
-
- model = self.__model_collection[0]
-
- from .DenseNet.densenet import preprocess_input, decode_predictions
- from .DenseNet.densenet import DenseNetImageNet121
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- return prediction_results, prediction_probabilities
- elif (self.__modelType == "inceptionv3"):
-
- model = self.__model_collection[0]
-
- from imageai.Prediction.InceptionV3.inceptionv3 import InceptionV3, preprocess_input, decode_predictions
-
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- return prediction_results, prediction_probabilities
-
-
-
- def predictMultipleImages(self, sent_images_array, result_count_per_image=2, input_type="file"):
- """
- 'predictMultipleImages()' function is used to predict more than one image by receiving the following arguments:
- * input_type , the type of inputs contained in the parsed array. Acceptable values are "file", "array" and "stream"
- * sent_images_array , an array of image file paths, image numpy array or image file stream
- * result_count_per_image (optionally) , the number of predictions to be sent per image, which must be whole numbers between
- 1 and 1000. The default is 2.
-
- This function returns an array of dictionaries, with each dictionary containing 2 arrays namely 'prediction_results' and 'prediction_probabilities'. The 'prediction_results'
- contains possible objects classes arranged in descending of their percentage probabilities. The 'prediction_probabilities'
- contains the percentage probability of each object class. The position of each object class in the 'prediction_results'
- array corresponds with the positions of the percentage possibilities in the 'prediction_probabilities' array.
-
-
- :param input_type:
- :param sent_images_array:
- :param result_count_per_image:
- :return output_array:
- """
-
- output_array = []
-
- for image_input in sent_images_array:
-
- prediction_results = []
- prediction_probabilities = []
- if (self.__modelLoaded == False):
- raise ValueError("You must call the loadModel() function before making predictions.")
-
- else:
-
- if (self.__modelType == "squeezenet"):
-
- from .imagenet_utils import preprocess_input, decode_predictions
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- model = self.__model_collection[0]
-
- prediction = model.predict(image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count_per_image))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- each_image_details = {}
- each_image_details["predictions"] = prediction_results
- each_image_details["percentage_probabilities"] = prediction_probabilities
- output_array.append(each_image_details)
-
- elif (self.__modelType == "resnet"):
-
- model = self.__model_collection[0]
-
- from .imagenet_utils import preprocess_input, decode_predictions
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count_per_image))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- each_image_details = {}
- each_image_details["predictions"] = prediction_results
- each_image_details["percentage_probabilities"] = prediction_probabilities
- output_array.append(each_image_details)
-
- elif (self.__modelType == "densenet"):
-
- model = self.__model_collection[0]
-
- from .DenseNet.densenet import preprocess_input, decode_predictions
- from .DenseNet.densenet import DenseNetImageNet121
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count_per_image))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- each_image_details = {}
- each_image_details["predictions"] = prediction_results
- each_image_details["percentage_probabilities"] = prediction_probabilities
- output_array.append(each_image_details)
-
- elif (self.__modelType == "inceptionv3"):
-
- model = self.__model_collection[0]
-
- from imageai.Prediction.InceptionV3.inceptionv3 import InceptionV3, preprocess_input, \
- decode_predictions
-
- if (input_type == "file"):
- try:
- image_to_predict = image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
- image_to_predict = image.img_to_array(image_to_predict, data_format="channels_last")
- image_to_predict = np.expand_dims(image_to_predict, axis=0)
-
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have set a path to an invalid image file.")
- elif (input_type == "array"):
- try:
- image_input = Image.fromarray(np.uint8(image_input))
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong numpy array for the image")
- elif (input_type == "stream"):
- try:
- image_input = Image.open(image_input)
- image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
- image_input = np.expand_dims(image_input, axis=0)
- image_to_predict = image_input.copy()
- image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
- image_to_predict = preprocess_input(image_to_predict)
- except:
- raise ValueError("You have parsed in a wrong stream for the image")
-
- prediction = model.predict(x=image_to_predict, steps=1)
-
- try:
- predictiondata = decode_predictions(prediction, top=int(result_count_per_image))
-
- for results in predictiondata:
- countdown = 0
- for result in results:
- countdown += 1
- prediction_results.append(str(result[1]))
- prediction_probabilities.append(result[2] * 100)
- except:
- raise ValueError("An error occured! Try again.")
-
- each_image_details = {}
- each_image_details["predictions"] = prediction_results
- each_image_details["percentage_probabilities"] = prediction_probabilities
- output_array.append(each_image_details)
-
-
- return output_array
-
diff --git a/imageai/Prediction/__pycache__/__init__.cpython-35.pyc b/imageai/Prediction/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 4c0614c8..00000000
Binary files a/imageai/Prediction/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Prediction/__pycache__/imagenet_utils.cpython-35.pyc b/imageai/Prediction/__pycache__/imagenet_utils.cpython-35.pyc
deleted file mode 100644
index dc80a1c9..00000000
Binary files a/imageai/Prediction/__pycache__/imagenet_utils.cpython-35.pyc and /dev/null differ
diff --git a/imageai/__init__.py b/imageai/__init__.py
index e69de29b..6d14c62c 100644
--- a/imageai/__init__.py
+++ b/imageai/__init__.py
@@ -0,0 +1 @@
+from .backend_check import backend_check
\ No newline at end of file
diff --git a/imageai/__pycache__/__init__.cpython-35.pyc b/imageai/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 3f5a195e..00000000
Binary files a/imageai/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/imageai/Detection/keras_retinanet/__init__.py b/imageai/backend_check/__init__.py
similarity index 100%
rename from imageai/Detection/keras_retinanet/__init__.py
rename to imageai/backend_check/__init__.py
diff --git a/imageai/backend_check/backend_check.py b/imageai/backend_check/backend_check.py
new file mode 100644
index 00000000..319a89b4
--- /dev/null
+++ b/imageai/backend_check/backend_check.py
@@ -0,0 +1,11 @@
+try:
+ import torch
+ import torchvision
+except:
+ try:
+ import tensorflow
+ import keras
+
+ raise RuntimeError("Dependency error!!! It appears you are trying to use ImageAI with a Tensorflow backend. ImageAI now uses PyTorch as backed as from version 3.0.2 . If you want to use the Tensorflow models or a customly trained '.h5' model, install ImageAI 2.1.6 or earlier. To use the latest Pytorch models, see the documentation in https://imageai.readthedocs.io/")
+ except:
+ raise RuntimeError("Dependency error!!! PyTorch and TorchVision are not installed. Please see installation instructions in the documentation https://imageai.readthedocs.io/")
\ No newline at end of file
diff --git a/imageai/backend_check/model_extension.py b/imageai/backend_check/model_extension.py
new file mode 100644
index 00000000..030e0b74
--- /dev/null
+++ b/imageai/backend_check/model_extension.py
@@ -0,0 +1,7 @@
+import os
+
+def extension_check(file_path: str):
+ if file_path.endswith(".h5"):
+ raise RuntimeError("You are trying to use a Tensorflow model with ImageAI. ImageAI now uses PyTorch as backed as from version 3.0.2 . If you want to use the Tensorflow models or a customly trained '.h5' model, install ImageAI 2.1.6 or earlier. To use the latest Pytorch models, see the documentation in https://imageai.readthedocs.io/")
+ elif file_path.endswith(".pt") == False and file_path.endswith(".pth") == False:
+ raise ValueError(f"Invalid model file {os.path.basename(file_path)}. Please parse in a '.pt' and '.pth' model file.")
diff --git a/imageai/densenet121/__init__.py b/imageai/densenet121/__init__.py
new file mode 100644
index 00000000..357c6978
--- /dev/null
+++ b/imageai/densenet121/__init__.py
@@ -0,0 +1,153 @@
+import os, warnings
+from pathlib import Path
+from typing import List, Tuple
+
+import torch, torchvision
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+
+warnings.filterwarnings("once", category=ResourceWarning)
+
+class DenseNet121Pretrained:
+ """
+ An implementation that allows for easy classification of images
+ using the state of the art MobileNet computer vision model.
+ """
+ def __init__(self, label_path : str) -> None:
+ self.__model = torchvision.models.densenet121(pretrained=False)
+ self.__classes = self.__load_classes(label_path)
+ self.__has_loaded_weights = False
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__model_path = ""
+
+ def __load_classes(self, path : str) -> List[str]:
+ with open(path) as f:
+ unique_classes = [c.strip() for c in f.readlines()]
+ return unique_classes
+
+ def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
+ """
+ Loads image/images from the given path. If image_path is a directory, this
+ function only load the images in the directory (it does not visit the sub-
+ directories). This function also convert the loaded image/images to the
+ specification expected by the MobileNetV2 architecture.
+ """
+ allowed_file_extensions = ["jpg", "jpeg", "png"]
+ images = []
+ fnames = []
+ preprocess = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if os.path.isfile(image_path):
+ img = Image.open(image_path).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(os.path.basename(image_path))
+
+ elif os.path.isdir(image_path):
+ for file in os.listdir(image_path):
+ if os.path.isfile(os.path.join(image_path, file)) and\
+ file.rsplit('.')[-1].lower() in allowed_file_extensions:
+ img = Image.open(os.path.join(image_path, file)).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(file)
+ if images:
+ return fnames, torch.stack(images)
+ raise RuntimeError(
+ f"Error loading images from {os.path.abspath(image_path)}."
+ "\nEnsure the folder contains images,"
+ " allowed file extensions are .jpg, .jpeg, .png"
+ )
+
+ # properties
+ model_path = property(
+ fget=lambda self : self.__model_path,
+ fset=lambda self, path: self.set_model_path(path),
+ doc="Path containing the pretrained weight."
+ )
+
+ def set_model_path(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ self.__model_path = path
+ self.__has_loaded_weights = False
+ else:
+ raise ValueError(
+ "parameter path should be a path to the pretrianed weight file."
+ )
+
+ def load_model(self) -> None:
+ """
+ Loads the mobilenet vison weight into the model architecture.
+ """
+ if not self.__has_loaded_weights:
+ try:
+ import re
+ state_dict = torch.load(self.__model_path, map_location=self.__device)
+ # '.'s are no longer allowed in module names, but previous densenet layers
+ # as provided by the pytorch organization has names that uses '.'s.
+ pattern = re.compile(
+ r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
+ "(?:weight|bias|running_mean|running_var))$"
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+ self.__model.load_state_dict(state_dict)
+ self.__has_loaded_weights = True
+ self.__model.eval()
+ except Exception:
+ print("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+ def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
+ """
+ Classfies image/images according to the classes provided by imagenet.
+
+ Parameters:
+ -----------
+ image_path: a path to a single image or a path to a directory containing
+ images. If image_path is a path to a file, this functions
+ classifies the image according to the categories provided
+ by imagenet, else, if image_path is a path to a directory
+ that contains images, this function classifies all images in
+ the given directory (it doesn't visit the subdirectories).
+
+ top_n: number of top predictions to return.
+ verbose: if true, it prints the top_n predictions.
+ """
+ if not self.__has_loaded_weights:
+ warnings.warn("Pretrained weights aren't loaded", ResourceWarning)
+
+ fnames, images = self.__load_image(image_path)
+ images = images.to(self.__device)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
+
+ predictions = [
+ [
+ (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
+ for j in range(top5_prob.shape[1])
+ ]
+ for i in range(top5_prob.shape[0])
+ ]
+
+ if verbose:
+ for idx, pred in enumerate(predictions):
+ print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
+ for label, score in pred:
+ print(f"\t{label}:{score: >10}")
+ print("-"*50, "\n")
+ return predictions
+
diff --git a/imageai/inceptionv3/__init__.py b/imageai/inceptionv3/__init__.py
new file mode 100644
index 00000000..b86071a3
--- /dev/null
+++ b/imageai/inceptionv3/__init__.py
@@ -0,0 +1,154 @@
+import os, warnings
+from pathlib import Path
+from typing import List, Tuple
+
+import torch, torchvision
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+
+warnings.filterwarnings("once", category=ResourceWarning)
+
+class InceptionV3Pretrained:
+ """
+ An implementation that allows for easy classification of images
+ using the state of the art MobileNet computer vision model.
+ """
+ def __init__(self, label_path : str) -> None:
+ self.__model = torchvision.models.inception_v3(pretrained=False)
+ self.__classes = self.__load_classes(label_path)
+ self.__has_loaded_weights = False
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__model_path = ""
+
+ def __load_classes(self, path : str) -> List[str]:
+ with open(path) as f:
+ unique_classes = [c.strip() for c in f.readlines()]
+ return unique_classes
+
+ def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
+ """
+ Loads image/images from the given path. If image_path is a directory, this
+ function only load the images in the directory (it does not visit the sub-
+ directories). This function also convert the loaded image/images to the
+ specification expected by the MobileNetV2 architecture.
+ """
+ allowed_file_extensions = ["jpg", "jpeg", "png"]
+ images = []
+ fnames = []
+ preprocess = transforms.Compose([
+ transforms.Resize(299),
+ transforms.CenterCrop(299),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if os.path.isfile(image_path):
+ img = Image.open(image_path).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(os.path.basename(image_path))
+
+ elif os.path.isdir(image_path):
+ for file in os.listdir(image_path):
+ if os.path.isfile(os.path.join(image_path, file)) and\
+ file.rsplit('.')[-1].lower() in allowed_file_extensions:
+ img = Image.open(os.path.join(image_path, file)).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(file)
+ if images:
+ return fnames, torch.stack(images)
+ raise RuntimeError(
+ f"Error loading images from {os.path.abspath(image_path)}."
+ "\nEnsure the folder contains images,"
+ " allowed file extensions are .jpg, .jpeg, .png"
+ )
+
+ # properties
+ model_path = property(
+ fget=lambda self : self.__model_path,
+ fset=lambda self, path: self.set_model_path(path),
+ doc="Path containing the pretrained weight."
+ )
+
+ def set_model_path(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ self.__model_path = path
+ self.__has_loaded_weights = False
+ else:
+ raise ValueError(
+ "parameter path should be a path to the pretrianed weight file."
+ )
+
+ def load_model(self) -> None:
+ """
+ Loads the mobilenet vison weight into the model architecture.
+ """
+ if not self.__has_loaded_weights:
+ try:
+ self.__model.load_state_dict(
+ torch.load(self.__model_path, map_location=self.__device)
+ )
+ self.__has_loaded_weights = True
+ self.__model.eval()
+ except Exception:
+ print("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+ def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
+ """
+ Classfies image/images according to the classes provided by imagenet.
+
+ Parameters:
+ -----------
+ image_path: a path to a single image or a path to a directory containing
+ images. If image_path is a path to a file, this functions
+ classifies the image according to the categories provided
+ by imagenet, else, if image_path is a path to a directory
+ that contains images, this function classifies all images in
+ the given directory (it doesn't visit the subdirectories).
+
+ top_n: number of top predictions to return.
+ verbose: if true, it prints the top_n predictions.
+ """
+ if not self.__has_loaded_weights:
+ if self.__model_path:
+ warnings.warn(
+ "Model path has changed but pretrained weights in the"
+ " new path are yet to be loaded.",
+ ResourceWarning
+ )
+ else:
+ warnings.warn(
+ "Model path isn't set, pretrained weights aren't used.",
+ ResourceWarning
+ )
+
+ fnames, images = self.__load_image(image_path)
+ images = images.to(self.__device)
+ print(images.shape)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
+
+ with open(os.path.join(str(Path(__file__).resolve().parent.parent), "imagenet_classes.txt")) as f:
+ categories = [c.strip() for c in f.readlines()]
+ predictions = [
+ [
+ (categories[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
+ for j in range(top5_prob.shape[1])
+ ]
+ for i in range(top5_prob.shape[0])
+ ]
+
+ if verbose:
+ for idx, pred in enumerate(predictions):
+ print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
+ for label, score in pred:
+ print(f"\t{label}:{score: >10}")
+ print("-"*50, "\n")
+ return predictions
+
diff --git a/imageai/mobilenetv2/__init__.py b/imageai/mobilenetv2/__init__.py
new file mode 100644
index 00000000..98df5d51
--- /dev/null
+++ b/imageai/mobilenetv2/__init__.py
@@ -0,0 +1,150 @@
+import os, warnings
+from pathlib import Path
+from typing import List, Tuple
+
+import torch, torchvision
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+
+warnings.filterwarnings("once", category=ResourceWarning)
+
+class MobileNetV2Pretrained:
+ """
+ An implementation that allows for easy classification of images
+ using the state of the art MobileNet computer vision model.
+ """
+ def __init__(self, label_path : str) -> None:
+ self.__model = torchvision.models.mobilenet_v2(pretrained=False)
+ self.__classes = self.__load_classes(label_path)
+ self.__has_loaded_weights = False
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__model_path = ""
+
+ def __load_classes(self, path : str) -> List[str]:
+ with open(path) as f:
+ unique_classes = [c.strip() for c in f.readlines()]
+ return unique_classes
+
+ def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
+ """
+ Loads image/images from the given path. If image_path is a directory, this
+ function only load the images in the directory (it does not visit the sub-
+ directories). This function also convert the loaded image/images to the
+ specification expected by the MobileNetV2 architecture.
+ """
+ allowed_file_extensions = ["jpg", "jpeg", "png"]
+ images = []
+ fnames = []
+ preprocess = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if os.path.isfile(image_path):
+ img = Image.open(image_path).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(os.path.basename(image_path))
+
+ elif os.path.isdir(image_path):
+ for file in os.listdir(image_path):
+ if os.path.isfile(os.path.join(image_path, file)) and\
+ file.rsplit('.')[-1].lower() in allowed_file_extensions:
+ img = Image.open(os.path.join(image_path, file)).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(file)
+ if images:
+ return fnames, torch.stack(images)
+ raise RuntimeError(
+ f"Error loading images from {os.path.abspath(image_path)}."
+ "\nEnsure the folder contains images,"
+ " allowed file extensions are .jpg, .jpeg, .png"
+ )
+
+ # properties
+ model_path = property(
+ fget=lambda self : self.__model_path,
+ fset=lambda self, path: self.set_model_path(path),
+ doc="Path containing the pretrained weight."
+ )
+
+ def set_model_path(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ self.__model_path = path
+ self.__has_loaded_weight = False
+ else:
+ raise ValueError(
+ "parameter path should be a valid path to the pretrianed weight file."
+ )
+
+ def load_model(self) -> None:
+ """
+ Loads the mobilenet vison weight into the model architecture.
+ """
+ if not self.__has_loaded_weights:
+ try:
+ self.__model.load_state_dict(
+ torch.load(self.__model_path, map_location=self.__device)
+ )
+ self.__has_loaded_weights = True
+ self.__model.eval()
+ except Exception:
+ print("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+ def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
+ """
+ Classfies image/images according to the classes provided by imagenet.
+
+ Parameters:
+ -----------
+ image_path: a path to a single image or a path to a directory containing
+ images. If image_path is a path to a file, this functions
+ classifies the image according to the categories provided
+ by imagenet, else, if image_path is a path to a directory
+ that contains images, this function classifies all images in
+ the given directory (it doesn't visit the subdirectories).
+
+ top_n: number of top predictions to return.
+ verbose: if true, it prints the top_n predictions.
+ """
+ if not self.__has_loaded_weights:
+ if self.__model_path:
+ warnings.warn(
+ "Model path has changed but pretrained weights in the"
+ " new path are yet to be loaded.",
+ ResourceWarning
+ )
+ else:
+ warnings.warn(
+ "Model path isn't set, pretrained weights aren't used.",
+ ResourceWarning
+ )
+
+ fnames, images = self.__load_image(image_path)
+ images = images.to(self.__device)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
+
+ predictions = [
+ [
+ (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
+ for j in range(top5_prob.shape[1])
+ ]
+ for i in range(top5_prob.shape[0])
+ ]
+
+ if verbose:
+ for idx, pred in enumerate(predictions):
+ print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
+ for label, score in pred:
+ print(f"\t{label}:{score: >10}")
+ print("-"*50, "\n")
+ return predictions
diff --git a/imageai/resnet50/__init__.py b/imageai/resnet50/__init__.py
new file mode 100644
index 00000000..2fd4b5fc
--- /dev/null
+++ b/imageai/resnet50/__init__.py
@@ -0,0 +1,149 @@
+import os, warnings
+from typing import List, Tuple
+
+import torch, torchvision
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+
+warnings.filterwarnings("once", category=ResourceWarning)
+
+class ResNet50Pretrained:
+ """
+ An implementation that allows for easy classification of images
+ using the state of the art MobileNet computer vision model.
+ """
+ def __init__(self, label_path : str) -> None:
+ self.__model = torchvision.models.resnet50(pretrained=False)
+ self.__classes = self.__load_classes(label_path)
+ self.__has_loaded_weights = False
+ self.__device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__model_path = ""
+
+ def __load_classes(self, path : str) -> List[str]:
+ with open(path) as f:
+ unique_classes = [c.strip() for c in f.readlines()]
+ return unique_classes
+
+ def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
+ """
+ Loads image/images from the given path. If image_path is a directory, this
+ function only load the images in the directory (it does not visit the sub-
+ directories). This function also convert the loaded image/images to the
+ specification expected by the MobileNetV2 architecture.
+ """
+ allowed_file_extensions = ["jpg", "jpeg", "png"]
+ images = []
+ fnames = []
+ preprocess = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+ if os.path.isfile(image_path):
+ img = Image.open(image_path).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(os.path.basename(image_path))
+
+ elif os.path.isdir(image_path):
+ for file in os.listdir(image_path):
+ if os.path.isfile(os.path.join(image_path, file)) and\
+ file.rsplit('.')[-1].lower() in allowed_file_extensions:
+ img = Image.open(os.path.join(image_path, file)).convert("RGB")
+ images.append(preprocess(img))
+ fnames.append(file)
+ if images:
+ return fnames, torch.stack(images)
+ raise RuntimeError(
+ f"Error loading images from {os.path.abspath(image_path)}."
+ "\nEnsure the folder contains images,"
+ " allowed file extensions are .jpg, .jpeg, .png"
+ )
+
+ # properties
+ model_path = property(
+ fget=lambda self : self.__model_path,
+ fset=lambda self, path: self.set_model_path(path),
+ doc="Path containing the pretrained weight."
+ )
+
+ def set_model_path(self, path : str) -> None:
+ """
+ Sets the path to the pretrained weight.
+ """
+ if os.path.isfile(path):
+ self.__model_path = path
+ self.__has_loaded_weights = False
+ else:
+ raise ValueError(
+ "parameter path should be a path to the pretrianed weight file."
+ )
+
+ def load_model(self) -> None:
+ """
+ Loads the mobilenet vison weight into the model architecture.
+ """
+ if not self.__has_loaded_weights:
+ try:
+ self.__model.load_state_dict(
+ torch.load(self.__model_path, map_location=self.__device)
+ )
+ self.__has_loaded_weights = True
+ self.__model.eval()
+ except Exception:
+ print("Weight loading failed.\nEnsure the model path is"
+ " set and the weight file is in the specified model path.")
+
+ def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
+ """
+ Classfies image/images according to the classes provided by imagenet.
+
+ Parameters:
+ -----------
+ image_path: a path to a single image or a path to a directory containing
+ images. If image_path is a path to a file, this functions
+ classifies the image according to the categories provided
+ by imagenet, else, if image_path is a path to a directory
+ that contains images, this function classifies all images in
+ the given directory (it doesn't visit the subdirectories).
+
+ top_n: number of top predictions to return.
+ verbose: if true, it prints the top_n predictions.
+ """
+ if not self.__has_loaded_weights:
+ if self.__model_path:
+ warnings.warn(
+ "Model path has changed but pretrained weights in the"
+ " new path are yet to be loaded.",
+ ResourceWarning
+ )
+ else:
+ warnings.warn(
+ "Model path isn't set, pretrained weights aren't used.",
+ ResourceWarning
+ )
+
+ fnames, images = self.__load_image(image_path)
+ images = images.to(self.__device)
+
+ with torch.no_grad():
+ output = self.__model(images)
+ probabilities = torch.softmax(output, dim=1)
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
+
+ predictions = [
+ [
+ (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
+ for j in range(top5_prob.shape[1])
+ ]
+ for i in range(top5_prob.shape[0])
+ ]
+
+ if verbose:
+ for idx, pred in enumerate(predictions):
+ print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
+ for label, score in pred:
+ print(f"\t{label}:{score: >10}")
+ print("-"*50, "\n")
+ return predictions
diff --git a/imageai/Detection/keras_retinanet/models/__init__.py b/imageai/retinanet/__init__.py
similarity index 100%
rename from imageai/Detection/keras_retinanet/models/__init__.py
rename to imageai/retinanet/__init__.py
diff --git a/imageai/retinanet/utils.py b/imageai/retinanet/utils.py
new file mode 100644
index 00000000..1b23d7ce
--- /dev/null
+++ b/imageai/retinanet/utils.py
@@ -0,0 +1,289 @@
+
+from torchvision.io import ImageReadMode
+import torch
+from PIL import Image, ImageColor, ImageDraw, ImageFont
+from typing import List, Optional, Union, Tuple, BinaryIO
+import numpy as np
+import math
+import warnings
+import pathlib
+
+def read_file(path: str) -> torch.Tensor:
+ """
+ Reads and outputs the bytes contents of a file as a uint8 Tensor
+ with one dimension.
+
+ Args:
+ path (str): the path to the file to be read
+
+ Returns:
+ data (Tensor)
+ """
+ data = torch.ops.image.read_file(path)
+ return data
+
+def decode_image(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
+ """
+ Detects whether an image is a JPEG or PNG and performs the appropriate
+ operation to decode the image into a 3 dimensional RGB or grayscale Tensor.
+
+ Optionally converts the image to the desired format.
+ The values of the output tensor are uint8 in [0, 255].
+
+ Args:
+ input (Tensor): a one dimensional uint8 tensor containing the raw bytes of the
+ PNG or JPEG image.
+ mode (ImageReadMode): the read mode used for optionally converting the image.
+ Default: ``ImageReadMode.UNCHANGED``.
+ See ``ImageReadMode`` class for more information on various
+ available modes.
+
+ Returns:
+ output (Tensor[image_channels, image_height, image_width])
+ """
+ output = torch.ops.image.decode_image(input, mode.value)
+ return output
+
+def read_image(path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
+ """
+ Reads a JPEG or PNG image into a 3 dimensional RGB or grayscale Tensor.
+ Optionally converts the image to the desired format.
+ The values of the output tensor are uint8 in [0, 255].
+
+ Args:
+ path (str): path of the JPEG or PNG image.
+ mode (ImageReadMode): the read mode used for optionally converting the image.
+ Default: ``ImageReadMode.UNCHANGED``.
+ See ``ImageReadMode`` class for more information on various
+ available modes.
+
+ Returns:
+ output (Tensor[image_channels, image_height, image_width])
+ """
+
+ data = read_file(path)
+ return decode_image(data, mode)
+
+def _generate_color_palette(num_objects: int):
+ palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
+ return [tuple((i * palette) % 255) for i in range(num_objects)]
+
+@torch.no_grad()
+def make_grid(
+ tensor: Union[torch.Tensor, List[torch.Tensor]],
+ nrow: int = 8,
+ padding: int = 2,
+ normalize: bool = False,
+ value_range: Optional[Tuple[int, int]] = None,
+ scale_each: bool = False,
+ pad_value: float = 0.0,
+ **kwargs,
+) -> torch.Tensor:
+ """
+ Make a grid of images.
+
+ Args:
+ tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
+ or a list of images all of the same size.
+ nrow (int, optional): Number of images displayed in each row of the grid.
+ The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
+ padding (int, optional): amount of padding. Default: ``2``.
+ normalize (bool, optional): If True, shift the image to the range (0, 1),
+ by the min and max values specified by ``value_range``. Default: ``False``.
+ value_range (tuple, optional): tuple (min, max) where min and max are numbers,
+ then these numbers are used to normalize the image. By default, min and max
+ are computed from the tensor.
+ range (tuple. optional):
+ .. warning::
+ This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``value_range``
+ instead.
+ scale_each (bool, optional): If ``True``, scale each image in the batch of
+ images separately rather than the (min, max) over all images. Default: ``False``.
+ pad_value (float, optional): Value for the padded pixels. Default: ``0``.
+
+ Returns:
+ grid (Tensor): the tensor containing grid of images.
+ """
+ if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+ raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}")
+
+ if "range" in kwargs.keys():
+ warnings.warn(
+ "The parameter 'range' is deprecated since 0.12 and will be removed in 0.14. "
+ "Please use 'value_range' instead."
+ )
+ value_range = kwargs["range"]
+
+ # if list of tensors, convert to a 4D mini-batch Tensor
+ if isinstance(tensor, list):
+ tensor = torch.stack(tensor, dim=0)
+
+ if tensor.dim() == 2: # single image H x W
+ tensor = tensor.unsqueeze(0)
+ if tensor.dim() == 3: # single image
+ if tensor.size(0) == 1: # if single-channel, convert to 3-channel
+ tensor = torch.cat((tensor, tensor, tensor), 0)
+ tensor = tensor.unsqueeze(0)
+
+ if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
+ tensor = torch.cat((tensor, tensor, tensor), 1)
+
+ if normalize is True:
+ tensor = tensor.clone() # avoid modifying tensor in-place
+ if value_range is not None:
+ assert isinstance(
+ value_range, tuple
+ ), "value_range has to be a tuple (min, max) if specified. min and max are numbers"
+
+ def norm_ip(img, low, high):
+ img.clamp_(min=low, max=high)
+ img.sub_(low).div_(max(high - low, 1e-5))
+
+ def norm_range(t, value_range):
+ if value_range is not None:
+ norm_ip(t, value_range[0], value_range[1])
+ else:
+ norm_ip(t, float(t.min()), float(t.max()))
+
+ if scale_each is True:
+ for t in tensor: # loop over mini-batch dimension
+ norm_range(t, value_range)
+ else:
+ norm_range(tensor, value_range)
+
+ assert isinstance(tensor, torch.Tensor)
+ if tensor.size(0) == 1:
+ return tensor.squeeze(0)
+
+ # make the mini-batch of images into a grid
+ nmaps = tensor.size(0)
+ xmaps = min(nrow, nmaps)
+ ymaps = int(math.ceil(float(nmaps) / xmaps))
+ height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
+ num_channels = tensor.size(1)
+ grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
+ k = 0
+ for y in range(ymaps):
+ for x in range(xmaps):
+ if k >= nmaps:
+ break
+ # Tensor.copy_() is a valid method but seems to be missing from the stubs
+ # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_
+ grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined]
+ 2, x * width + padding, width - padding
+ ).copy_(tensor[k])
+ k = k + 1
+ return grid
+
+
+@torch.no_grad()
+def draw_bounding_boxes_and_labels(
+ image: torch.Tensor,
+ boxes: torch.Tensor,
+ draw_boxes: bool,
+ labels: Optional[List[str]] = None,
+ label_color: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
+ box_color: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
+ fill: Optional[bool] = False,
+ width: int = 1,
+ font: Optional[str] = None,
+ font_size: int = 10,
+) -> torch.Tensor:
+
+ """
+ Draws bounding boxes on given image.
+ The values of the input image should be uint8 between 0 and 255.
+ If fill is True, Resulting Tensor should be saved as PNG image.
+
+ Args:
+ image (Tensor): Tensor of shape (C x H x W) and dtype uint8.
+ boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that
+ the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and
+ `0 <= ymin < ymax < H`.
+ labels (List[str]): List containing the labels of bounding boxes.
+ colors (color or list of colors, optional): List containing the colors
+ of the boxes or single color for all boxes. The color can be represented as
+ PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
+ By default, random colors are generated for boxes.
+ fill (bool): If `True` fills the bounding box with specified color.
+ width (int): Width of bounding box.
+ font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
+ also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
+ `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+ font_size (int): The requested font size in points.
+
+ Returns:
+ img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
+ """
+
+ if not isinstance(image, torch.Tensor):
+ raise TypeError(f"Tensor expected, got {type(image)}")
+ elif image.dtype != torch.uint8:
+ raise ValueError(f"Tensor uint8 expected, got {image.dtype}")
+ elif image.dim() != 3:
+ raise ValueError("Pass individual images, not batches")
+ elif image.size(0) not in {1, 3}:
+ raise ValueError("Only grayscale and RGB images are supported")
+
+ num_boxes = boxes.shape[0]
+
+ if labels is None:
+ labels: Union[List[str], List[None]] = [None] * num_boxes # type: ignore[no-redef]
+ elif len(labels) != num_boxes:
+ raise ValueError(
+ f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box."
+ )
+
+
+ # Handle Grayscale images
+ if image.size(0) == 1:
+ image = torch.tile(image, (3, 1, 1))
+
+ ndarr = image.permute(1, 2, 0).cpu().numpy()
+ img_to_draw = Image.fromarray(ndarr)
+ img_boxes = boxes.to(torch.int64).tolist()
+
+ if fill:
+ draw = ImageDraw.Draw(img_to_draw, "RGBA")
+ else:
+ draw = ImageDraw.Draw(img_to_draw)
+
+ txt_font = ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size)
+
+ for bbox, label in zip(img_boxes, labels):
+ if draw_boxes:
+ if fill:
+ fill_color = label_color + (100,)
+ draw.rectangle(bbox, width=width, outline=label_color, fill=fill_color)
+ else:
+ draw.rectangle(bbox, width=width, outline=box_color)
+
+ if label is not None:
+ margin = width + 1
+ draw.text((bbox[0] + margin, bbox[1] + margin), label, fill=label_color, font=txt_font)
+
+ return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8)
+
+
+@torch.no_grad()
+def tensor_to_ndarray(
+ tensor: Union[torch.Tensor, List[torch.Tensor]],
+ **kwargs,
+) -> None:
+ """
+ Convert a Tensor into ndarray and return the array
+
+ Args:
+ tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
+ saves the tensor as a grid of images by calling ``make_grid``.
+ fp (string or file object): A filename or a file object
+ format(Optional): If omitted, the format to use is determined from the filename extension.
+ If a file object was used instead of a filename, this parameter should always be used.
+ **kwargs: Other arguments are documented in ``make_grid``.
+ """
+
+ grid = make_grid(tensor, **kwargs)
+ # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
+ ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
+
+ return ndarr
diff --git a/imageai/Detection/keras_retinanet/preprocessing/__init__.py b/imageai/yolov3/__init__.py
similarity index 100%
rename from imageai/Detection/keras_retinanet/preprocessing/__init__.py
rename to imageai/yolov3/__init__.py
diff --git a/imageai/yolov3/tiny_yolov3.py b/imageai/yolov3/tiny_yolov3.py
new file mode 100644
index 00000000..ef0617a5
--- /dev/null
+++ b/imageai/yolov3/tiny_yolov3.py
@@ -0,0 +1,90 @@
+from typing import Union, List, Tuple, Optional
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+from .yolov3 import DetectionLayer, ConvLayer
+
+
+class YoloV3Tiny(nn.Module):
+
+ def __init__(
+ self,
+ anchors : Union[List[int], Tuple[int,...]],
+ num_classes : int=80,
+ device : str="cpu"
+ ):
+ super().__init__()
+
+ # Network Layers
+ self.conv1 = ConvLayer(3, 16)
+ self.maxpool1 = nn.MaxPool2d(2, 2)
+ self.conv2 = ConvLayer(16, 32)
+ self.maxpool2 = nn.MaxPool2d(2, 2)
+ self.conv3 = ConvLayer(32, 64)
+ self.maxpool3 = nn.MaxPool2d(2, 2)
+ self.conv4 = ConvLayer(64, 128)
+ self.maxpool4 = nn.MaxPool2d(2, 2)
+ self.conv5 = ConvLayer(128, 256)
+ self.maxpool5 = nn.MaxPool2d(2, 2)
+ self.conv6 = ConvLayer(256, 512)
+ self.zeropad = nn.ZeroPad2d((0, 1, 0, 1))
+ self.maxpool6 = nn.MaxPool2d(2, 1)
+ self.conv7 = ConvLayer(512, 1024)
+ self.conv8 = ConvLayer(1024, 256, 1, 1)
+ self.conv9 = ConvLayer(256, 512)
+ self.conv10 = ConvLayer(
+ 512, (3 * (5+num_classes)), 1, 1,
+ use_batch_norm=False,
+ activation="linear"
+ )
+ self.yolo1 = DetectionLayer(
+ num_classes=num_classes, anchors=anchors,
+ anchor_masks=(3, 4, 5), device=device, layer=1
+ )
+ # self.__route_layer(conv8)
+ self.conv11 = ConvLayer(256, 128, 1, 1)
+ self.upsample1 = nn.Upsample(
+ scale_factor=2, mode="nearest"
+ #align_corners=True
+ )
+ # self.__route_layer(upsample1, conv5)
+ self.conv12 = ConvLayer(384, 256)
+ self.conv13 = ConvLayer(
+ 256, (3 * (5 + num_classes)), 1, 1,
+ use_batch_norm=False,
+ activation="linear"
+ )
+ self.yolo2 = DetectionLayer(
+ num_classes=num_classes, anchors=anchors,
+ anchor_masks=(0, 1, 2), device=device, layer=2
+ )
+
+ def get_loss_layers(self) -> List[torch.Tensor]:
+ return [self.yolo1, self.yolo2]
+
+ def __route_layer(self, y1 : torch.Tensor, y2 : Optional[torch.Tensor]=None) -> torch.Tensor:
+ if isinstance(y2, torch.Tensor):
+ return torch.cat([y1, y2], 1)
+ return y1
+
+ def forward(self, x : torch.Tensor) -> torch.Tensor:
+ y = self.maxpool2(self.conv2(self.maxpool1(self.conv1(x))))
+ y = self.maxpool4(self.conv4(self.maxpool3(self.conv3(y))))
+ r1 = self.conv5(y) # route layer
+ y = self.zeropad(self.conv6(self.maxpool5(r1)))
+ y = self.conv7(self.maxpool6(y))
+ r2 = self.conv8(y) # route layer
+ y = self.conv10(self.conv9(r2))
+
+ # first detection layer
+ out = self.yolo1(y)
+ y = self.conv11(self.__route_layer(r2))
+ y = self.__route_layer(self.upsample1(y), r1)
+ y = self.conv13(self.conv12(y))
+
+ # second detection layer
+ out = torch.cat([out, self.yolo2(y)], 1)
+
+ return out
diff --git a/imageai/yolov3/utils.py b/imageai/yolov3/utils.py
new file mode 100644
index 00000000..aedb0129
--- /dev/null
+++ b/imageai/yolov3/utils.py
@@ -0,0 +1,212 @@
+import math
+from typing import Union, List, Tuple
+
+import torch
+import numpy as np
+import cv2 as cv
+from torchvision.ops import batched_nms
+
+
+def draw_bbox_and_label(x : torch.Tensor, label : str, img : np.ndarray) -> np.ndarray:
+ """
+ Draws the predicted bounding boxes on the original image.
+ """
+ x1,y1,x2,y2 = tuple(map(int, x))
+ if x is not None:
+ img = cv.rectangle(img, (x1,y1), (x2,y2), (0, 255, 0), 1)
+ t_size = cv.getTextSize(label, cv.FONT_HERSHEY_PLAIN, 1, 1)[0]
+ c2 = (x1 + t_size[0] + 3, y1 + t_size[1] + 4)
+ img = cv.putText(img, label, (x1, y1+t_size[1]+4), cv.FONT_HERSHEY_PLAIN, 1, (0,0,255), 1)
+
+ return img
+
+def letterbox_image(
+ image : np.ndarray,
+ inp_dim : Tuple[int, int]) -> np.ndarray:
+ """
+ Resizes images into the dimension expected by the network. This
+ function fills extra spaces in the image with grayscale, if the
+ image is smaller than the expected dimesion. This implementation
+ keeps the aspect ration of the original image.
+ """
+ img_w, img_h = image.shape[1], image.shape[0] # original image dimension
+ net_w, net_h = inp_dim # the dimension expected by the network.
+
+ # calculate the new dimension with same aspect ration as
+ # the original image.
+ scale_factor = min(net_w/img_w, net_h/img_h)
+ new_w = int(round(img_w * scale_factor))
+ new_h = int(round(img_h * scale_factor))
+
+ resized_image = cv.resize(image, (new_w, new_h), interpolation=cv.INTER_CUBIC)
+ canvas = np.full((net_w, net_h, 3), 128)
+ canvas[(net_h - new_h)//2 : (net_h - new_h)//2 + new_h, (net_w - new_w)//2 : (net_w - new_w)//2 + new_w, :] = resized_image
+ return canvas
+
+def prepare_image(
+ image : np.ndarray,
+ inp_dim : Tuple[int, int]) -> torch.Tensor:
+ """
+ Prepared the input to match the expectation of the network.
+ """
+ img = letterbox_image(image, inp_dim)
+ img = img[:, :, ::-1].transpose((2, 0, 1)).copy()
+ img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
+ return img
+
+def bbox_iou(bbox1 : torch.Tensor, bbox2 : torch.Tensor, device="cpu"):
+ """
+ Returns the IoU value of overlapping boxes
+ """
+ b1_x1, b1_y1, b1_x2, b1_y2 = bbox1[:, 0], bbox1[:, 1], bbox1[:, 2], bbox1[:, 3]
+ b2_x1, b2_y1, b2_x2, b2_y2 = bbox2[:, 0], bbox2[:, 1], bbox2[:, 2], bbox2[:, 3]
+
+ # intersections
+ inter_rect_x1 = torch.max(b1_x1, b2_x1)
+ inter_rect_y1 = torch.max(b1_y1, b2_y1)
+ inter_rect_x2 = torch.min(b1_x2, b2_x2)
+ inter_rect_y2 = torch.min(b1_y2, b2_y2)
+ inter_area = torch.max(inter_rect_x2 - inter_rect_x1+1, torch.zeros(inter_rect_x2.shape, device=device)) * \
+ torch.max(inter_rect_y2 - inter_rect_y1+1, torch.zeros(inter_rect_y2.shape, device=device))
+
+ b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+ b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+ return inter_area / (b1_area + b2_area - inter_area)
+
+def transform_prediction(
+ pred : torch.Tensor,
+ inp_dim : int,
+ anchors : Union[List[int], Tuple[int, ...], torch.Tensor],
+ num_classes : int,
+ device : str = "cpu"
+ ) -> torch.Tensor:
+ """
+ Transforms the predictions of the convolutional layers
+ from
+ batch_size x (3 * 5+num_classes) x grid_size x grid_size
+ to
+ batch_size x (grid_size * grid_size * anchors) x num_classes
+ aids the concatenation of the prediction at the three detection layers
+ and also for easy representation of the predicted bounding boxes.
+
+ Also, transforms the bounding box predictions and the objectness score
+ to match the discription specified in the paper:
+ Bx = sigmoid(Tx) + Cx
+ By = sigmoid(Ty) + Cy
+ Bw = Pw(exp(Tw))
+ Bh = Ph(exp(Th))
+
+ Parameters:
+ -----------
+ pred: prediction of the convolutional layer
+ inp_dim: the dimension of images expected by the yolo neural network
+ anchors: a list of anchors
+ num_classes: the numbers of unique classes as specified by COCO.
+
+ Returns:
+ --------
+ the transformed input.
+ """
+ batch_size = pred.shape[0]
+ grid_size = pred.shape[2]
+ stride = inp_dim // grid_size
+ bbox_attrs = 5 + num_classes
+ num_anchors = len(anchors)
+
+ # transform input shape
+ pred = pred.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+ pred = pred.transpose(1, 2).contiguous()
+ pred = pred.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+
+ # since the dimensions of the anchors are in accordance with the original
+ # dimension of the image, it's required to scale the dimension of the
+ # anchors to match the dimension of the output of the convolutional
+ # layer
+ anchors = [(a[0] / stride, a[1] / stride) for a in anchors]
+
+ # sigmoid the center_x, center_y and the objectness score
+ pred[:, :, 0] = torch.sigmoid(pred[:, :, 0])
+ pred[:, :, 1] = torch.sigmoid(pred[:, :, 1])
+ pred[:, :, 4] = torch.sigmoid(pred[:, :, 4])
+
+ # add the center offsets
+ grid = torch.arange(grid_size, dtype=torch.float)
+ grid = np.arange(grid_size)
+ x_o, y_o = np.meshgrid(grid, grid)
+ #x_offset, y_offset = torch.meshgrid(grid, grid)
+
+ x_offset = torch.FloatTensor(x_o).view(-1, 1).to(device)
+ y_offset = torch.FloatTensor(y_o).view(-1, 1).to(device)
+ #x_offset = x_offset.transpose(0,1).reshape(-1,1).to(device)
+ #y_offset = y_offset.transpose(0,1).reshape(-1,1).to(device)
+
+ x_y_offset = torch.cat([x_offset, y_offset], dim=1).repeat(1, num_anchors).view(-1,2).unsqueeze(0)
+ pred[:, :, :2] += x_y_offset
+
+ # transform height and width
+ anchors = torch.FloatTensor(anchors).to(device)
+ anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+ pred[:, :, 2:4] = torch.exp(pred[:, :, 2:4])*anchors
+
+ # apply sigmoid to class scores
+ pred[:, :, 5:5+num_classes] = torch.sigmoid(pred[:, :, 5:5+num_classes])
+
+ # resize bounding box prediction to the original image dimension
+ pred[:, :, :4] *= stride
+
+ return pred
+
+def get_predictions(
+ pred : torch.Tensor,
+ num_classes : int,
+ objectness_confidence : float = 0.5,
+ nms_confidence_level : float = 0.4,
+ device : str = "cpu") -> Union[torch.Tensor, int]:
+ """
+ This function filters the bounding boxes predicted by the network by first
+ discarding bounding boxes that has low objectness score, and then proceeds
+ to filter overlapping bounding boxes using the non-maximum suppression
+ algorithm.
+
+ Parameters:
+ -----------
+ pred: a tensor (predicted output) of shape
+ 'batch_size x num_bboxes x bbox_attrs'
+ num_classes: the number of unique classes as provided by COCO.
+ objectness_confidence_level: probability threshold for bounding boxes
+ containing a valid object.
+ nms_convidence_level: threshold for overlapping bounding boxes
+
+ Returns:
+ --------
+ The prediction with reasonable bounding boxes.
+ """
+ nB = pred.shape[0] # number of batches
+ bbox_attr = pred.shape[2] # center_x, center_y, height, width, class_probabilites
+ nBBOX = pred.shape[1] # number of bounding boxes
+ conf_mask = (pred[:, :, 4] > objectness_confidence).float().unsqueeze(2)
+ pred = pred * conf_mask
+
+ # transform the predicted centers, height and width to top-left corner and
+ # right bottom corner coordinates to aid the ease computation of the IoU
+ bbox_corner = pred.new(pred.shape)
+ bbox_corner[:, :, 0] = (pred[:, :, 0] - (pred[:, :, 2] / 2)) # top-left_x
+ bbox_corner[:, :, 1] = (pred[:, :, 1] - (pred[:, :, 3] / 2)) # top-left_y
+ bbox_corner[:, :, 2] = (pred[:, :, 0] + (pred[:, :, 2] / 2)) # bottom_right_x
+ bbox_corner[:, :, 3] = (pred[:, :, 1] + (pred[:, :, 3] / 2)) # bottom_right_y
+ pred[:, :, :4] = bbox_corner[:, :, :4]
+
+ n_pred = pred.view(-1, bbox_attr)
+ idxs = torch.arange(nB).reshape(-1,1).repeat(1, nBBOX).view(-1).to(device) # image indices
+
+ max_conf, max_idx = torch.max(n_pred[:, 5:5+num_classes], 1) # maximum class score and the index
+ max_conf = max_conf.float().unsqueeze(1).to(device)
+ max_idx = max_idx.float().unsqueeze(1).to(device)
+ n_pred = torch.cat([idxs.unsqueeze(1), n_pred[:, :5], max_conf, max_idx], 1) # batch_idx, x1, y1, x2, y2, objectness_score, class_score, class_idx
+
+ valid_bbox_indices = batched_nms(n_pred[:, 1:5].clone(), n_pred[:, 5].clone(), n_pred[:, 7].clone(), nms_confidence_level)
+
+ if len(valid_bbox_indices):
+ return n_pred[valid_bbox_indices, :]
+ return None
diff --git a/imageai/yolov3/yolov3.py b/imageai/yolov3/yolov3.py
new file mode 100644
index 00000000..d1dc112c
--- /dev/null
+++ b/imageai/yolov3/yolov3.py
@@ -0,0 +1,350 @@
+from typing import Union, List, Tuple, Optional
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+from .utils import transform_prediction
+
+
+def noop(x):
+ return x
+
+class DetectionLayer(nn.Module):
+
+ def __init__(
+ self,
+ anchors : Union[List[int], Tuple[int, ...]],
+ anchor_masks : Tuple[int, int, int],
+ layer : int,
+ num_classes : int=80,
+ device : str="cpu"
+ ):
+ super().__init__()
+ self.height = 416
+ self.width = 416
+ self.num_classes = num_classes
+ self.ignore_thresh = 0.7
+ self.truth_thresh = 1
+ self.rescore = 1
+ self.device = device
+ self.anchors = self.__get_anchors(anchors, anchor_masks)
+ self.layer = layer
+ self.layer_width = None
+ self.layer_height = None
+ self.layer_output = None
+ self.pred = None
+ self.stride = None
+ self.grid = None
+ self.anchor_grid = None
+
+ def __get_anchors(
+ self, anchors : Union[List[int], Tuple[int, ...]],
+ anchor_masks : Tuple[int, int, int]
+ ) -> torch.Tensor:
+ a = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
+ return torch.tensor([a[i] for i in anchor_masks]).to(self.device)
+
+ def forward(self, x : torch.Tensor):
+ self.layer_height, self.layer_width = x.shape[2], x.shape[3]
+ self.stride = self.height // self.layer_height
+ if self.training:
+ batch_size = x.shape[0]
+ grid_size = x.shape[2]
+ bbox_attrs = 5 + self.num_classes
+ num_anchors = len(self.anchors)
+
+ # transform input shape
+ self.layer_output = x.detach()
+ self.pred = x.view(batch_size, num_anchors, bbox_attrs, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
+
+ self.layer_output = self.layer_output.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+ self.layer_output = self.layer_output.transpose(1, 2).contiguous()
+ self.layer_output = self.layer_output.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+
+ else:
+ # transform the output of the network and scale it to match the
+ # network dimension : 416x416
+ self.layer_output = transform_prediction(
+ x.data, self.width, self.anchors, self.num_classes,
+ self.device
+ )
+ return self.layer_output
+
+
+class ConvLayer(nn.Module):
+
+ def __init__(self, in_f : int, out_f : int, kernel_size : int = 3,
+ stride : int = 1, use_batch_norm : bool = True,
+ activation : str ="leaky"):
+ super().__init__()
+ self.conv = nn.Conv2d(
+ in_f, out_f, stride=stride, kernel_size=kernel_size,
+ padding= kernel_size//2,
+ bias=False if use_batch_norm else True
+ )
+ self.batch_norm = nn.BatchNorm2d(out_f) if use_batch_norm else noop
+ self.leaky_relu = nn.LeakyReLU(0.1, inplace=True) if activation=="leaky" else noop
+
+ def forward(self, x : torch.Tensor):
+ return self.leaky_relu(self.batch_norm(self.conv(x)))
+
+class YoloV3(nn.Module):
+
+ def __init__(
+ self,
+ anchors : Union[List[int], Tuple[int, ...]],
+ num_classes : int = 80,
+ device : str ="cpu"):
+ super().__init__()
+
+ # Network Layers
+ self.conv1 = ConvLayer(3, 32)
+ self.conv2 = ConvLayer(32, 64, stride=2)
+ self.conv3 = ConvLayer(64, 32, 1, 1)
+ self.conv4 = ConvLayer(32, 64)
+ # self.__shortcut_layer1(self.conv4, self.conv2)
+ self.conv5 = ConvLayer(64, 128, stride=2)
+ self.conv6 = ConvLayer(128, 64, 1, 1)
+ self.conv7 = ConvLayer(64, 128, stride=1)
+ # self.__shortcut_layer2(self.conv7, self.conv5)
+ self.conv8 = ConvLayer(128, 64, 1, 1)
+ self.conv9 = ConvLayer(64, 128, stride=1)
+ # self.__shortcut_layer3(self.conv9, shortcut2)
+ self.conv10 = ConvLayer(128, 256, stride=2)
+ self.conv11 = ConvLayer(256, 128, 1, 1)
+ self.conv12 = ConvLayer(128, 256)
+ # self.__shortcut_layer4(self.con12, self.conv10)
+ self.conv13 = ConvLayer(256, 128, 1, 1)
+ self.conv14 = ConvLayer(128, 256)
+ # self.__shortcut_layer5(self.conv14, shortcut4)
+ self.conv15 = ConvLayer(256, 128, 1, 1)
+ self.conv16 = ConvLayer(128, 256)
+ # self.__shortcut_layer6(self.conv16, shortcut5)
+ self.conv17 = ConvLayer(256, 128, 1, 1)
+ self.conv18 = ConvLayer(128, 256)
+ # self.__shortcut_layer7(self.conv18, shortcut6)
+ self.conv19 = ConvLayer(256, 128, 1, 1)
+ self.conv20 = ConvLayer(128, 256)
+ # self.__shortcut_layer8(self.conv20, shortcut7)
+ self.conv21 = ConvLayer(256, 128, 1, 1)
+ self.conv22 = ConvLayer(128, 256)
+ # self.__shortcut_layer9(self.conv22, shortcut8)
+ self.conv23 = ConvLayer(256, 128, 1, 1)
+ self.conv24 = ConvLayer(128, 256)
+ # self.__shortcut_layer10(self.conv24, shortcut9)
+ self.conv25 = ConvLayer(256, 128, 1, 1)
+ self.conv26 = ConvLayer(128, 256)
+ # self.__shortcut_layer11(self.conv26, shortcut10)
+ self.conv27 = ConvLayer(256, 512, stride=2)
+ self.conv28 = ConvLayer(512, 256, 1, 1)
+ self.conv29 = ConvLayer(256, 512)
+ # self.__shortcut_layer12(self.conv29, self.conv27)
+ self.conv30 = ConvLayer(512, 256, 1, 1)
+ self.conv31 = ConvLayer(256, 512)
+ # self.__shortcut_layer13(self.conv31, shortcut12)
+ self.conv32 = ConvLayer(512, 256, 1, 1)
+ self.conv33 = ConvLayer(256, 512)
+ # self.__shortcut_layer14(self.conv33, shortcut13)
+ self.conv34 = ConvLayer(512, 256, 1, 1)
+ self.conv35 = ConvLayer(256, 512)
+ # self.__shortcut_layer15(self.conv35, shortcut14)
+ self.conv36 = ConvLayer(512, 256, 1, 1)
+ self.conv37 = ConvLayer(256, 512)
+ # self.__shortcut_layer16(self.conv37, shortcut15)
+ self.conv38 = ConvLayer(512, 256, 1, 1)
+ self.conv39 = ConvLayer(256, 512)
+ # self.__shortcut_layer17(self.conv39, shortcut16)
+ self.conv40 = ConvLayer(512, 256, 1, 1)
+ self.conv41 = ConvLayer(256, 512)
+ # self.__shortcut_layer18(self.conv41, shortcut17)
+ self.conv42 = ConvLayer(512, 256, 1, 1)
+ self.conv43 = ConvLayer(256, 512)
+ # self.__shortcut_layer19(self.conv43, shortcut18)
+ self.conv44 = ConvLayer(512, 1024, stride=2)
+ self.conv45 = ConvLayer(1024, 512, 1, 1)
+ self.conv46 = ConvLayer(512, 1024)
+ # self.__shortcut_layer20(self.conv46, self.conv44)
+ self.conv47 = ConvLayer(1024, 512, 1, 1)
+ self.conv48 = ConvLayer(512, 1024)
+ # self.__shortcut_layer21(self.conv48, shortcut20)
+ self.conv49 = ConvLayer(1024, 512, 1, 1)
+ self.conv50 = ConvLayer(512, 1024)
+ # self.__shortcut_layer22(self.conv50, shortcut21)
+ self.conv51 = ConvLayer(1024, 512, 1, 1)
+ self.conv52 = ConvLayer(512, 1024)
+ # self.__shortcut_layer23(self.conv52, shortcut22)
+ self.conv53 = ConvLayer(1024, 512, 1, 1)
+ self.conv54 = ConvLayer(512, 1024)
+ self.conv55 = ConvLayer(1024, 512, 1, 1)
+ self.conv56 = ConvLayer(512, 1024)
+ self.conv57 = ConvLayer(1024, 512, 1, 1)
+ self.conv58 = ConvLayer(512, 1024)
+ self.conv59 = ConvLayer(
+ 1024, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
+ activation="linear"
+ )
+
+ # yolo layer
+ self.yolo1 = DetectionLayer(
+ num_classes=num_classes, anchors=anchors,
+ anchor_masks=(6, 7, 8), device=device, layer=1
+ )
+
+ # self.__route_layer(self.conv57)
+ self.conv60 = ConvLayer(512, 256, 1, 1)
+ self.upsample1 = nn.Upsample(
+ scale_factor=2, mode="nearest"
+ #align_corners=True
+ )
+ # self.__route_layer(self.upsample1, shortcut19)
+ self.conv61 = ConvLayer(768, 256, 1, 1)
+ self.conv62 = ConvLayer(256, 512)
+ self.conv63 = ConvLayer(512, 256, 1, 1)
+ self.conv64 = ConvLayer(256, 512)
+ self.conv65 = ConvLayer(512, 256, 1, 1)
+ self.conv66 = ConvLayer(256, 512)
+ self.conv67 = ConvLayer(
+ 512, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
+ activation="linear"
+ )
+
+ # yolo layer
+ self.yolo2 = DetectionLayer(
+ num_classes=num_classes, anchors=anchors,
+ anchor_masks=(3, 4, 5), device=device, layer=2
+ )
+
+ # self.__route_layer(self.conv65)
+ self.conv68 = ConvLayer(256, 128, 1, 1)
+ self.upsample2 = nn.Upsample(
+ scale_factor=2, mode="nearest"
+ #align_corners=True
+ )
+ # self.__route_layer(self.upsample2, shortcut11)
+
+ self.conv69 = ConvLayer(384, 128, 1, 1)
+ self.conv70 = ConvLayer(128, 256)
+ self.conv71 = ConvLayer(256, 128, 1, 1)
+ self.conv72 = ConvLayer(128, 256)
+ self.conv73 = ConvLayer(256, 128, 1, 1)
+ self.conv74 = ConvLayer(128, 256)
+ self.conv75 = ConvLayer(
+ 256, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
+ activation="linear"
+ )
+
+ # yolo layer
+ self.yolo3 = DetectionLayer(
+ num_classes=num_classes, anchors=anchors,
+ anchor_masks=(0, 1, 2), device=device, layer=3
+ )
+
+ def get_loss_layers(self) -> List[torch.Tensor]:
+ return [self.yolo1, self.yolo2, self.yolo3]
+
+ def __route_layer(self, y1 : torch.Tensor, y2 : Optional[torch.Tensor]=None):
+ if isinstance(y2, torch.Tensor):
+ return torch.cat([y1, y2], 1)
+ return y1
+
+ def __shortcut_layer(self,
+ y1 : torch.Tensor, y2 : torch.Tensor,
+ activation : str="linear"
+ ) -> torch.Tensor:
+ actv = noop if activation=="linear" else nn.LeakyReLU(0.1)
+ return actv(y1 + y2)
+
+ def forward(self, x : torch.Tensor) -> torch.Tensor:
+ y = self.conv2(self.conv1(x))
+ # shortcut1
+ y = self.conv5(self.__shortcut_layer(self.conv4(self.conv3(y)), y))
+ y2 = self.conv7(self.conv6(y))
+ # shortcut2
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv9(self.conv8(y))
+ # shortcut3
+ y2 = self.conv10(self.__shortcut_layer(y2, y))
+ y = self.conv12(self.conv11(y2))
+ # shortcut4
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv14(self.conv13(y2))
+ # shortcut5
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv16(self.conv15(self.__shortcut_layer(y2, y)))
+ # shortcut6
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv18(self.conv17(y2))
+ # shortcut7
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv20(self.conv19(y2))
+ # shortcut8
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv22(self.conv21(y2))
+ # shortcut9
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv24(self.conv23(y2))
+ # shortcut10
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv26(self.conv25(y2))
+ # shortcut11
+ r1 = self.__shortcut_layer(y, y2) # route_layer
+ y = self.conv27(r1)
+ y2 = self.conv29(self.conv28(y))
+ # shortcut12
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv31(self.conv30(y))
+ # shortcut13
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv33(self.conv32(y))
+ # shortcut14
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv35(self.conv34(y))
+ # shortcut15
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv37(self.conv36(y))
+ # shortcut16
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv39(self.conv38(y))
+ # shortcut17
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv41(self.conv40(y))
+ # shortcut18
+ y = self.__shortcut_layer(y2, y)
+ y2 = self.conv43(self.conv42(y))
+ # shortcut19
+ r2 = self.__shortcut_layer(y2, y) # route_layer
+ y2 = self.conv44(r2)
+ y = self.conv46(self.conv45(y2))
+ # shortcut20
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv48(self.conv47(y2))
+ # shortcut21
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv50(self.conv49(y2))
+ # shortcut22
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv52(self.conv51(y2))
+ # shortcut23
+ y2 = self.__shortcut_layer(y, y2)
+ y = self.conv54(self.conv53(y2))
+ r3 = self.conv57(self.conv56(self.conv55(y))) # route_layer
+ y = self.conv59(self.conv58(r3))
+
+ # first detection layer
+ out = self.yolo1(y)
+ y = self.conv60(self.__route_layer(r3))
+ y = self.conv62(self.conv61(self.__route_layer(self.upsample1(y), r2)))
+ r4 = self.conv65(self.conv64(self.conv63(y))) # route_layer
+ y = self.conv67(self.conv66(r4))
+
+ # second detection layer
+ out = torch.cat([out, self.yolo2(y)], dim=1)
+ y = self.conv68(self.__route_layer(r4))
+ y = self.conv70(self.conv69(self.__route_layer(self.upsample2(y), r1)))
+ y = self.conv75(self.conv74(self.conv73(self.conv72(self.conv71(y)))))
+
+ # third detection layer
+ out = torch.cat([out, self.yolo3(y)], dim=1)
+
+ return out
diff --git a/imageai_tf_deprecated/Classification/CUSTOMCLASSIFICATION.md b/imageai_tf_deprecated/Classification/CUSTOMCLASSIFICATION.md
new file mode 100644
index 00000000..bffcc4ef
--- /dev/null
+++ b/imageai_tf_deprecated/Classification/CUSTOMCLASSIFICATION.md
@@ -0,0 +1,155 @@
+# ImageAI : Custom Image Classification
+A **DeepQuest AI** project https://deepquestai.com
+
+---
+
+ImageAI provides 4 different algorithms and model types to perform custom image prediction using your custom models.
+You will be able to use your model trained with **ImageAI** and the corresponding model_class JSON file to predict custom objects
+that you have trained the model on.
+
+### TABLE OF CONTENTS
+
+- :white_square_button: Custom Model Prediction
+- :white_square_button: Custom Model Prediction with Full Model (NEW)
+- :white_square_button: Custom Prediction with multiple models (NEW)
+- :white_square_button: Convert custom model to Tensorflow's format (NEW)
+- :white_square_button: Convert custom model to DeepStack's format (NEW)
+
+
+### Custom Model Prediction
+
+
+In this example, we will be using the model trained for 20 experiments on **IdenProf**, a dataset of uniformed professionals and achieved 65.17% accuracy on the test dataset.
+(You can use your own trained model and generated JSON file. This 'class' is provided mainly for the purpose to use your own custom models.)
+Download the ResNet model of the model and JSON files in links below:
+
+- [**ResNet50**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof_resnet_ex-056_acc-0.993062.h5) _(Size = 90.4 mb)_
+- [**IdenProf model_class.json file**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof.json)
+
+Great!
+Once you have downloaded this model file and the JSON file, start a new python project, and then copy the model file and the JSON file to your project folder where your python files (.py files) will be.
+Download the image below, or take any image on your computer that include any of the following professionals(Chef, Doctor, Engineer, Farmer, Fireman, Judge, Mechanic, Pilot, Police and Waiter) and copy it to your python project's folder.
+Then create a python file and give it a name; an example is **FirstCustomPrediction.py**.
+Then write the code below into the python file:
+
+### FirstCustomPrediction.py
+
+```python
+from imageai.Classification.Custom import CustomImageClassification
+import os
+
+execution_path = os.getcwd()
+
+prediction = CustomImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "idenprof_resnet_ex-056_acc-0.993062.h5"))
+prediction.setJsonPath(os.path.join(execution_path, "idenprof.json"))
+prediction.loadModel(num_objects=10)
+
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
+
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction + " : " + eachProbability)
+```
+
+**Sample Result:**
+
+
+```
+mechanic : 76.82620286941528
+chef : 10.106072574853897
+waiter : 4.036874696612358
+police : 2.6663416996598244
+pilot : 2.239348366856575
+```
+
+The code above works as follows:
+```python
+from imageai.Classification.Custom import CustomImageClassification
+import os
+```
+The code above imports the **ImageAI** library for custom image prediction and the python **os** class.
+
+```python
+execution_path = os.getcwd()
+```
+
+The above line obtains the path to the folder that contains your python file (in this example, your FirstCustomPrediction.py).
+
+```python
+prediction = CustomImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "idenprof_resnet_ex-056_acc-0.993062.h5"))
+prediction.setJsonPath(os.path.join(execution_path, "idenprof.json"))
+prediction.loadModel(num_objects=10)
+```
+
+In the lines above, we created and instance of the `CustomImageClassification()`
+ class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()`
+ in the second line, we set the model path of the prediction object to the path of the custom model file (`idenprof_resnet_ex-056_acc-0.993062.h5`) we copied to the python file folder
+ in the third line, we set the path to the model_class.json of the model, we load the model and parse the number of objected that can be predicted in the model.
+
+```python
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
+```
+
+In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 10 in this case) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.
+
+```python
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction + " : " + eachProbability)
+```
+
+The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.
+
+**CustomImageClassification** class also supports the multiple predictions, input types and prediction speeds that are contained
+in the **ImageClassification** class. Follow this [link](README.md) to see all the details.
+
+
+
+### Custom Prediction with multiple models
+
+
+
+In previous versions of **ImageAI**, running more than one custom model at once wasn't supported.
+Now you can run multiple custom models, as many as your computer memory can accommodate.
+See the example code below for running multiple custom prediction models.
+
+```python
+from imageai.Classification.Custom import CustomImageClassification
+import os
+
+execution_path = os.getcwd()
+
+predictor = CustomImageClassification()
+predictor.setModelPath(model_path=os.path.join(execution_path, "idenprof_resnet.h5"))
+predictor.setJsonPath(model_json=os.path.join(execution_path, "idenprof.json"))
+predictor.setModelTypeAsResNet50()
+predictor.loadModel(num_objects=10)
+
+predictor2 = CustomImageClassification()
+predictor2.setModelPath(model_path=os.path.join(execution_path, "idenprof_inception_0.719500.h5"))
+predictor2.setJsonPath(model_json=os.path.join(execution_path, "idenprof.json"))
+predictor2.setModelTypeAsInceptionV3()
+predictor2.loadModel(num_objects=10)
+
+results, probabilities = predictor.classifyImage(image_input=os.path.join(execution_path, "9.jpg"), result_count=5)
+print(results)
+print(probabilities)
+
+
+results2, probabilities2 = predictor3.classifyImage(image_input=os.path.join(execution_path, "9.jpg"),
+ result_count=5)
+print(results2)
+print(probabilities2)
+print("-------------------------------")
+```
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
+
diff --git a/imageai_tf_deprecated/Classification/CUSTOMTRAINING.md b/imageai_tf_deprecated/Classification/CUSTOMTRAINING.md
new file mode 100644
index 00000000..1314b0ec
--- /dev/null
+++ b/imageai_tf_deprecated/Classification/CUSTOMTRAINING.md
@@ -0,0 +1,265 @@
+# ImageAI : Custom Prediction Model Training
+
+---
+
+**ImageAI** provides the most simple and powerful approach to training custom image prediction models
+using state-of-the-art SqueezeNet, ResNet50, InceptionV3 and DenseNet
+which you can load into the `imageai.Classification.Custom.CustomImageClassification` class. This allows
+ you to train your own model on any set of images that corresponds to any type of objects/persons.
+The training process generates a JSON file that maps the objects types in your image dataset
+and creates lots of models. You will then pick the model with the highest accuracy and perform custom
+image prediction using the model and the JSON file generated.
+
+### TABLE OF CONTENTS
+- :white_square_button: Custom Model Training Prediction
+- :white_square_button: Saving Full Custom Model
+- :white_square_button: Training on the IdenProf Dataset
+- :white_square_button: Continuous Model Training
+- :white_square_button: Transfer Learning (Training from a pre-trained model)
+
+
+### Custom Model Training
+
+
+Because model training is a compute intensive tasks, we strongly advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing model training on CPU will my take hours or days. With NVIDIA GPU powered computer system, this will take a few hours. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available.
+
+To train a custom prediction model, you need to prepare the images you want to use to train the model.
+You will prepare the images as follows:
+
+1. Create a dataset folder with the name you will like your dataset to be called (e.g pets)
+2. In the dataset folder, create a folder by the name **train**
+3. In the dataset folder, create a folder by the name **test**
+4. In the train folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
+5. In the test folder, create a folder for each object you want to the model to predict and give
+ the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)
+6. In each folder present in the train folder, put the images of each object in its respective folder. This images are the ones to be used to train the model To produce a model that can perform well in practical applications, I recommend you about 500 or more images per object. 1000 images per object is just great
+7. In each folder present in the test folder, put about 100 to 200 images of each object in its respective folder. These images are the ones to be used to test the model as it trains
+8. Once you have done this, the structure of your image dataset folder should look like below:
+ ```
+ pets//train//dog//dog-train-images
+ pets//train//cat//cat-train-images
+ pets//train//squirrel//squirrel-train-images
+ pets//train//snake//snake-train-images
+ pets//test//dog//dog-test-images
+ pets//test//cat//cat-test-images
+ pets//test//squirrel//squirrel-test-images
+ pets//test//snake//snake-test-images
+ ```
+9. Then your training code goes as follows:
+ ```python
+ from imageai.Classification.Custom import ClassificationModelTrainer
+ model_trainer = ClassificationModelTrainer()
+ model_trainer.setModelTypeAsResNet50()
+ model_trainer.setDataDirectory("pets")
+ model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
+ ```
+
+ Yes! Just 5 lines of code and you can train any of the available 4 state-of-the-art Deep Learning algorithms on your custom dataset.
+Now lets take a look at how the code above works.
+
+```python
+from imageai.Classification.Custom import ClassificationModelTrainer
+model_trainer = ClassificationModelTrainer()
+model_trainer.setModelTypeAsResNet50()
+model_trainer.setDataDirectory("pets")
+```
+
+In the first line, we import the **ImageAI** model training class, then we define the model trainer in the second line,
+ we set the network type in the third line and set the path to the image dataset we want to train the network on.
+
+```python
+model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
+```
+
+In the code above, we start the training process. The parameters stated in the function are as below:
+- **num_objects** : this is to state the number of object types in the image dataset
+- **num_experiments** : this is to state the number of times the network will train over all the training images,
+ which is also called epochs
+- **enhance_data (optional)** : This is used to state if we want the network to produce modified copies of the training
+images for better performance.
+- **batch_size** : This is to state the number of images the network will process at ones. The images
+ are processed in batches until they are exhausted per each experiment performed.
+- **show_network_summary** : This is to state if the network should show the structure of the training
+ network in the console.
+
+
+When you start the training, you should see something like this in the console:
+```
+Total params: 23,608,202
+Trainable params: 23,555,082
+Non-trainable params: 53,120
+____________________________________________________________________________________________________
+Using Enhanced Data Generation
+Found 4000 images belonging to 4 classes.
+Found 800 images belonging to 4 classes.
+JSON Mapping for the model classes saved to C:\Users\User\PycharmProjects\ImageAITest\pets\json\model_class.json
+Number of experiments (Epochs) : 100
+```
+
+When the training progress progresses, you will see results as follows in the console:
+```
+Epoch 1/100
+ 1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500
+ 2/25 [=>............................] - ETA: 41s - loss: 2.3027 - acc: 0.1250
+ 3/25 [==>...........................] - ETA: 37s - loss: 2.2961 - acc: 0.1667
+ 4/25 [===>..........................] - ETA: 36s - loss: 2.2980 - acc: 0.1250
+ 5/25 [=====>........................] - ETA: 33s - loss: 2.3178 - acc: 0.1000
+ 6/25 [======>.......................] - ETA: 31s - loss: 2.3214 - acc: 0.0833
+ 7/25 [=======>......................] - ETA: 30s - loss: 2.3202 - acc: 0.0714
+ 8/25 [========>.....................] - ETA: 29s - loss: 2.3207 - acc: 0.0625
+ 9/25 [=========>....................] - ETA: 27s - loss: 2.3191 - acc: 0.0556
+10/25 [===========>..................] - ETA: 25s - loss: 2.3167 - acc: 0.0750
+11/25 [============>.................] - ETA: 23s - loss: 2.3162 - acc: 0.0682
+12/25 [=============>................] - ETA: 21s - loss: 2.3143 - acc: 0.0833
+13/25 [==============>...............] - ETA: 20s - loss: 2.3135 - acc: 0.0769
+14/25 [===============>..............] - ETA: 18s - loss: 2.3132 - acc: 0.0714
+15/25 [=================>............] - ETA: 16s - loss: 2.3128 - acc: 0.0667
+16/25 [==================>...........] - ETA: 15s - loss: 2.3121 - acc: 0.0781
+17/25 [===================>..........] - ETA: 13s - loss: 2.3116 - acc: 0.0735
+18/25 [====================>.........] - ETA: 12s - loss: 2.3114 - acc: 0.0694
+19/25 [=====================>........] - ETA: 10s - loss: 2.3112 - acc: 0.0658
+20/25 [=======================>......] - ETA: 8s - loss: 2.3109 - acc: 0.0625
+21/25 [========================>.....] - ETA: 7s - loss: 2.3107 - acc: 0.0595
+22/25 [=========================>....] - ETA: 5s - loss: 2.3104 - acc: 0.0568
+23/25 [==========================>...] - ETA: 3s - loss: 2.3101 - acc: 0.0543
+24/25 [===========================>..] - ETA: 1s - loss: 2.3097 - acc: 0.0625Epoch 00000: saving model to C:\Users\Moses\Documents\Moses\W7\AI\Custom Datasets\IDENPROF\idenprof-small-test\idenprof\models\model_ex-000_acc-0.100000.h5
+
+25/25 [==============================] - 51s - loss: 2.3095 - acc: 0.0600 - val_loss: 2.3026 - val_acc: 0.1000
+```
+
+Let us explain the details shown above:
+1. The line **Epoch 1/100** means the network is training the first experiment of the targeted 100
+2. The line `1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500` represents the number of batches that has been trained in the present experiment
+3. The line `Epoch 00000: saving model to C:\Users\User\PycharmProjects\ImageAITest\pets\models\model_ex-000_acc-0.100000.h5` refers to the model saved after the present experiment. The **ex_000** represents the experiment at this stage while the **acc_0.100000** and **val_acc: 0.1000** represents the accuracy of the model on the test images after the present experiment (maximum value value of accuracy is 1.0). This result helps to know the best performed model you can use for custom image prediction.
+
+ Once you are done training your custom model, you can use the "CustomImagePrediction" class to perform image prediction with your model. Simply follow the link below.
+[imageai/Classification/CUSTOMCLASSIFICATION.md](https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Classification/CUSTOMCLASSIFICATION.md)
+
+
+### Training on the IdenProf data
+
+A sample from the IdenProf Dataset used to train a Model for predicting professionals.
+
+
+Below we provide a sample code to train on **IdenProf**, a dataset which contains images of 10 uniformed professionals. The code below will download the dataset and initiate the training:
+
+```python
+from io import open
+import requests
+import shutil
+from zipfile import ZipFile
+import os
+from imageai.Classification.Custom import ClassificationModelTrainer
+
+execution_path = os.getcwd()
+
+TRAIN_ZIP_ONE = os.path.join(execution_path, "idenprof-train1.zip")
+TRAIN_ZIP_TWO = os.path.join(execution_path, "idenprof-train2.zip")
+TEST_ZIP = os.path.join(execution_path, "idenprof-test.zip")
+
+DATASET_DIR = os.path.join(execution_path, "idenprof")
+DATASET_TRAIN_DIR = os.path.join(DATASET_DIR, "train")
+DATASET_TEST_DIR = os.path.join(DATASET_DIR, "test")
+
+if(os.path.exists(DATASET_DIR) == False):
+ os.mkdir(DATASET_DIR)
+if(os.path.exists(DATASET_TRAIN_DIR) == False):
+ os.mkdir(DATASET_TRAIN_DIR)
+if(os.path.exists(DATASET_TEST_DIR) == False):
+ os.mkdir(DATASET_TEST_DIR)
+
+if(len(os.listdir(DATASET_TRAIN_DIR)) < 10):
+ if(os.path.exists(TRAIN_ZIP_ONE) == False):
+ print("Downloading idenprof-train1.zip")
+ data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-train1.zip", stream = True)
+ with open(TRAIN_ZIP_ONE, "wb") as file:
+ shutil.copyfileobj(data.raw, file)
+ del data
+ if (os.path.exists(TRAIN_ZIP_TWO) == False):
+ print("Downloading idenprof-train2.zip")
+ data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-train2.zip", stream=True)
+ with open(TRAIN_ZIP_TWO, "wb") as file:
+ shutil.copyfileobj(data.raw, file)
+ del data
+ print("Extracting idenprof-train1.zip")
+ extract1 = ZipFile(TRAIN_ZIP_ONE)
+ extract1.extractall(DATASET_TRAIN_DIR)
+ extract1.close()
+ print("Extracting idenprof-train2.zip")
+ extract2 = ZipFile(TRAIN_ZIP_TWO)
+ extract2.extractall(DATASET_TRAIN_DIR)
+ extract2.close()
+
+if(len(os.listdir(DATASET_TEST_DIR)) < 10):
+ if (os.path.exists(TEST_ZIP) == False):
+ print("Downloading idenprof-test.zip")
+ data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-test.zip", stream=True)
+ with open(TEST_ZIP, "wb") as file:
+ shutil.copyfileobj(data.raw, file)
+ del data
+ print("Extracting idenprof-test.zip")
+ extract = ZipFile(TEST_ZIP)
+ extract.extractall(DATASET_TEST_DIR)
+ extract.close()
+
+
+model_trainer = ClassificationModelTrainer()
+model_trainer.setModelTypeAsResNet50()
+model_trainer.setDataDirectory(DATASET_DIR)
+model_trainer.trainModel(num_objects=10, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
+```
+
+### Continuous Model Training
+
+
+**ImageAI** now allows you to continue training your custom model on your previously saved model.
+This is useful in cases of incomplete training due compute time limits/large size of dataset or should you intend to further train your model.
+Kindly note that **continuous training** is for using a previously saved model to train on the same dataset the model was trained on.
+All you need to do is specify the `continue_from_model` parameter to the path of the previously saved model in your `trainModel()` function.
+See an example code below.
+
+```python
+from imageai.Classification.Custom import ClassificationModelTrainer
+import os
+
+trainer = ClassificationModelTrainer()
+trainer.setModelTypeAsDenseNet121()
+trainer.setDataDirectory("idenprof")
+trainer.trainModel(num_objects=10, num_experiments=50, enhance_data=True, batch_size=8, show_network_summary=True, continue_from_model="idenprof_densenet-0.763500.h5")
+```
+
+### Transfer Learning (Training from a pre-trained model)
+
+
+From the feedbacks we have received over the past months, we discovered most custom models trained with **ImageAI** were based on datasets with few number of images as they fall short the minimum recommendation of 500 images per each class of objects, for a achieving a viable accuracy.
+
+To ensure they can still train very accurate custom models using few number of images, **ImageAI** now allows you to train by leveraging **transfer learning** . This means you can take any pre-trained **ResNet50**, **Squeezenet**, **InceptionV3** and **DenseNet121** model trained on larger datasets and use it to kickstart your custom model training.
+All you need to do is specify the `transfer_from_model` parameter to the path of the pre-trained model, `initial_num_objects` parameter which corresponds to the number of objects in the previous dataset the pre-trained model was trained on, all in your `trainModel()` function. See an example code below, showing how to perform transfer learning from a ResNet50 model trained on the ImageNet dataset.
+
+```python
+from imageai.Classification.Custom import ClassificationModelTrainer
+import os
+
+trainer = ClassificationModelTrainer()
+trainer.setModelTypeAsResNet50()
+trainer.setDataDirectory("idenprof")
+trainer.trainModel(num_objects=10, num_experiments=50, enhance_data=True, batch_size=32, show_network_summary=True,transfer_from_model="resnet50_imagenet_tf.2.0.h5", initial_num_objects=1000)
+```
+
+
+### Contact Developer
+- **Moses Olafenwa**
+ * _Email:_ guymodscientist@gmail.com
+ * _Website:_ [https://moses.aicommons.science](https://moses.aicommons.science)
+ * _Twitter:_ [@OlafenwaMoses](https://twitter.com/OlafenwaMoses)
+ * _Medium:_ [@guymodscientist](https://medium.com/@guymodscientist)
+ * _Facebook:_ [moses.olafenwa](https://facebook.com/moses.olafenwa)
+
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
diff --git a/imageai_tf_deprecated/Classification/Custom/__init__.py b/imageai_tf_deprecated/Classification/Custom/__init__.py
new file mode 100644
index 00000000..4ba8222e
--- /dev/null
+++ b/imageai_tf_deprecated/Classification/Custom/__init__.py
@@ -0,0 +1,666 @@
+import tensorflow as tf
+from PIL import Image
+import time
+import numpy as np
+import os
+import warnings
+from matplotlib.cbook import deprecated
+import json
+
+class ClassificationModelTrainer:
+ """
+ This is the Classification Model training class, that allows you to define a deep learning network
+ from the 4 available networks types supported by ImageAI which are MobileNetv2, ResNet50,
+ InceptionV3 and DenseNet121.
+ """
+
+ def __init__(self):
+ self.__modelType = ""
+ self.__use_pretrained_model = False
+ self.__data_dir = ""
+ self.__train_dir = ""
+ self.__test_dir = ""
+ self.__logs_dir = ""
+ self.__num_epochs = 10
+ self.__trained_model_dir = ""
+ self.__model_class_dir = ""
+ self.__initial_learning_rate = 1e-3
+ self.__model_collection = []
+
+
+ def setModelTypeAsSqueezeNet(self):
+ raise ValueError("ImageAI no longer support SqueezeNet. You can use MobileNetV2 instead by downloading the MobileNetV2 model and call the function 'setModelTypeAsMobileNetV2'")
+
+ def setModelTypeAsMobileNetV2(self):
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
+ for the training instance object .
+ :return:
+ """
+ self.__modelType = "mobilenetv2"
+
+ @deprecated(since="2.1.6", message="'.setModelTypeAsResNet()' has been deprecated! Please use 'setModelTypeAsResNet50()' instead.")
+ def setModelTypeAsResNet(self):
+ return self.setModelTypeAsResNet50()
+
+ def setModelTypeAsResNet50(self):
+ """
+ 'setModelTypeAsResNet()' is used to set the model type to the ResNet model
+ for the training instance object .
+ :return:
+ """
+ self.__modelType = "resnet50"
+
+
+ @deprecated(since="2.1.6", message="'.setModelTypeAsDenseNet()' has been deprecated! Please use 'setModelTypeAsDenseNet121()' instead.")
+ def setModelTypeAsDenseNet(self):
+ return self.setModelTypeAsDenseNet121()
+
+ def setModelTypeAsDenseNet121(self):
+ """
+ 'setModelTypeAsDenseNet()' is used to set the model type to the DenseNet model
+ for the training instance object .
+ :return:
+ """
+ self.__modelType = "densenet121"
+
+ def setModelTypeAsInceptionV3(self):
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
+ for the training instance object .
+ :return:
+ """
+ self.__modelType = "inceptionv3"
+
+ def setDataDirectory(self, data_directory="", train_subdirectory="train", test_subdirectory="test",
+ models_subdirectory="models", json_subdirectory="json"):
+ """
+ 'setDataDirectory()'
+
+ - data_directory , is required to set the path to which the data/dataset to be used for
+ training is kept. The directory can have any name, but it must have 'train' and 'test'
+ sub-directory. In the 'train' and 'test' sub-directories, there must be sub-directories
+ with each having it's name corresponds to the name/label of the object whose images are
+ to be kept. The structure of the 'test' and 'train' folder must be as follows:
+
+ >> train >> class1 >> class1_train_images
+ >> class2 >> class2_train_images
+ >> class3 >> class3_train_images
+ >> class4 >> class4_train_images
+ >> class5 >> class5_train_images
+
+ >> test >> class1 >> class1_test_images
+ >> class2 >> class2_test_images
+ >> class3 >> class3_test_images
+ >> class4 >> class4_test_images
+ >> class5 >> class5_test_images
+
+ - train_subdirectory (optional), subdirectory within 'data_directory' where the training set is. Defaults to 'train'.
+ - test_subdirectory (optional), subdirectory within 'data_directory' where the testing set is. Defaults to 'test'.
+ - models_subdirectory (optional), subdirectory within 'data_directory' where the output models will be saved. Defaults to 'models'.
+ - json_subdirectory (optional), subdirectory within 'data_directory' where the model classes json file will be saved. Defaults to 'json'.
+
+ :param data_directory:
+ :param train_subdirectory:
+ :param test_subdirectory:
+ :param models_subdirectory:
+ :param json_subdirectory:
+ :return:
+ """
+
+ self.__data_dir = data_directory
+
+ self.__train_dir = os.path.join(self.__data_dir, train_subdirectory)
+ self.__test_dir = os.path.join(self.__data_dir, test_subdirectory)
+ self.__trained_model_dir = os.path.join(self.__data_dir, models_subdirectory)
+ self.__model_class_dir = os.path.join(self.__data_dir, json_subdirectory)
+ self.__logs_dir = os.path.join(self.__data_dir, "logs")
+
+ def lr_schedule(self, epoch):
+
+ # Learning Rate Schedule
+
+
+ lr = self.__initial_learning_rate
+ total_epochs = self.__num_epochs
+
+ check_1 = int(total_epochs * 0.9)
+ check_2 = int(total_epochs * 0.8)
+ check_3 = int(total_epochs * 0.6)
+ check_4 = int(total_epochs * 0.4)
+
+ if epoch > check_1:
+ lr *= 1e-4
+ elif epoch > check_2:
+ lr *= 1e-3
+ elif epoch > check_3:
+ lr *= 1e-2
+ elif epoch > check_4:
+ lr *= 1e-1
+
+
+ return lr
+
+
+
+
+ def trainModel(self, num_objects, num_experiments=200, enhance_data=False, batch_size = 32, initial_learning_rate=1e-3, show_network_summary=False, training_image_size = 224, continue_from_model=None, transfer_from_model=None, transfer_with_full_training=True, initial_num_objects = None, save_full_model = False):
+
+ """
+ 'trainModel()' function starts the model actual training. It accepts the following values:
+ - num_objects , which is the number of classes present in the dataset that is to be used for training
+ - num_experiments , also known as epochs, it is the number of times the network will train on all the training dataset
+ - enhance_data (optional) , this is used to modify the dataset and create more instance of the training set to enhance the training result
+ - batch_size (optional) , due to memory constraints, the network trains on a batch at once, until all the training set is exhausted. The value is set to 32 by default, but can be increased or decreased depending on the meormory of the compute used for training. The batch_size is conventionally set to 16, 32, 64, 128.
+ - initial_learning_rate(optional) , this value is used to adjust the weights generated in the network. You rae advised to keep this value as it is if you don't have deep understanding of this concept.
+ - show_network_summary(optional) , this value is used to show the structure of the network should you desire to see it. It is set to False by default
+ - training_image_size(optional) , this value is used to define the image size on which the model will be trained. The value is 224 by default and is kept at a minimum of 100.
+ - continue_from_model (optional) , this is used to set the path to a model file trained on the same dataset. It is primarily for continuos training from a previously saved model.
+ - transfer_from_model (optional) , this is used to set the path to a model file trained on another dataset. It is primarily used to perform tramsfer learning.
+ - transfer_with_full_training (optional) , this is used to set the pre-trained model to be re-trained across all the layers or only at the top layers.
+ - initial_num_objects (required if 'transfer_from_model' is set ), this is used to set the number of objects the model used for transfer learning is trained on. If 'transfer_from_model' is set, this must be set as well.
+ - save_full_model ( optional ), this is used to save the trained models with their network types. Any model saved by this specification can be loaded without specifying the network type.
+
+
+ :param num_objects:
+ :param num_experiments:
+ :param enhance_data:
+ :param batch_size:
+ :param initial_learning_rate:
+ :param show_network_summary:
+ :param training_image_size:
+ :param continue_from_model:
+ :param transfer_from_model:
+ :param initial_num_objects:
+ :param save_full_model:
+ :return:
+ """
+ self.__num_epochs = num_experiments
+ self.__initial_learning_rate = initial_learning_rate
+ lr_scheduler = tf.keras.callbacks.LearningRateScheduler(self.lr_schedule)
+
+
+ if(training_image_size < 100):
+ warnings.warn("The specified training_image_size {} is less than 100. Hence the training_image_size will default to 100.".format(training_image_size))
+ training_image_size = 100
+
+
+
+ if (self.__modelType == "mobilenetv2"):
+ if (continue_from_model != None):
+ model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
+ include_top=True)
+ if (show_network_summary == True):
+ print("Training using weights from a previouly model")
+ elif (transfer_from_model != None):
+ base_model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.model.Models(inputs=base_model.input, outputs=network)
+
+ if (show_network_summary == True):
+ print("Training using weights from a pre-trained ImageNet model")
+ else:
+ base_model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.models.Model(inputs=base_model.input, outputs=network)
+
+ elif (self.__modelType == "resnet50"):
+ if (continue_from_model != None):
+ model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
+ include_top=True)
+ if (show_network_summary == True):
+ print("Training using weights from a previouly model")
+ elif (transfer_from_model != None):
+ base_model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.model.Models(inputs=base_model.input, outputs=network)
+
+ if (show_network_summary == True):
+ print("Training using weights from a pre-trained ImageNet model")
+ else:
+ base_model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.models.Model(inputs=base_model.input, outputs=network)
+
+ elif (self.__modelType == "inceptionv3"):
+
+ if (continue_from_model != None):
+ model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
+ include_top=True)
+ if (show_network_summary == True):
+ print("Training using weights from a previouly model")
+ elif (transfer_from_model != None):
+ base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.model.Models(inputs=base_model.input, outputs=network)
+
+ if (show_network_summary == True):
+ print("Training using weights from a pre-trained ImageNet model")
+ else:
+ base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.models.Model(inputs=base_model.input, outputs=network)
+
+ base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+ elif (self.__modelType == "densenet121"):
+ if (continue_from_model != None):
+ model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
+ include_top=True)
+ if (show_network_summary == True):
+ print("Training using weights from a previouly model")
+ elif (transfer_from_model != None):
+ base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.model.Models(inputs=base_model.input, outputs=network)
+
+ if (show_network_summary == True):
+ print("Training using weights from a pre-trained ImageNet model")
+ else:
+ base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+ network = base_model.output
+ network = tf.keras.layers.Dense(num_objects, activation='softmax',
+ use_bias=True)(network)
+
+ model = tf.keras.models.Model(inputs=base_model.input, outputs=network)
+
+ base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
+ include_top=False, pooling="avg")
+
+
+ optimizer = tf.keras.optimizers.Adam(lr=self.__initial_learning_rate, decay=1e-4)
+ model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
+ if (show_network_summary == True):
+ model.summary()
+
+ model_name = 'model_ex-{epoch:03d}_acc-{accuracy:03f}.h5'
+
+ log_name = '{}_lr-{}_{}'.format(self.__modelType, initial_learning_rate, time.strftime("%Y-%m-%d-%H-%M-%S"))
+
+ if not os.path.isdir(self.__trained_model_dir):
+ os.makedirs(self.__trained_model_dir)
+
+ if not os.path.isdir(self.__model_class_dir):
+ os.makedirs(self.__model_class_dir)
+
+ if not os.path.isdir(self.__logs_dir):
+ os.makedirs(self.__logs_dir)
+
+ model_path = os.path.join(self.__trained_model_dir, model_name)
+
+
+ logs_path = os.path.join(self.__logs_dir, log_name)
+ if not os.path.isdir(logs_path):
+ os.makedirs(logs_path)
+
+ save_weights_condition = True
+
+ if(save_full_model == True ):
+ save_weights_condition = False
+
+
+ checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=model_path,
+ monitor='accuracy',
+ verbose=1,
+ save_weights_only=save_weights_condition,
+ save_best_only=True,
+ period=1)
+
+
+ tensorboard = tf.keras.callbacks.TensorBoard(log_dir=logs_path,
+ histogram_freq=0,
+ write_graph=False,
+ write_images=False)
+
+
+ if (enhance_data == True):
+ print("Using Enhanced Data Generation")
+
+ height_shift = 0
+ width_shift = 0
+ if (enhance_data == True):
+ height_shift = 0.1
+ width_shift = 0.1
+
+ train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
+ rescale=1. / 255,
+ horizontal_flip=enhance_data, height_shift_range=height_shift, width_shift_range=width_shift)
+
+ test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
+ rescale=1. / 255)
+
+ train_generator = train_datagen.flow_from_directory(self.__train_dir, target_size=(training_image_size, training_image_size),
+ batch_size=batch_size,
+ class_mode="categorical")
+ test_generator = test_datagen.flow_from_directory(self.__test_dir, target_size=(training_image_size, training_image_size),
+ batch_size=batch_size,
+ class_mode="categorical")
+
+ class_indices = train_generator.class_indices
+ class_json = {}
+ for eachClass in class_indices:
+ class_json[str(class_indices[eachClass])] = eachClass
+
+ with open(os.path.join(self.__model_class_dir, "model_class.json"), "w+") as json_file:
+ json.dump(class_json, json_file, indent=4, separators=(",", " : "),
+ ensure_ascii=True)
+ json_file.close()
+ print("JSON Mapping for the model classes saved to ", os.path.join(self.__model_class_dir, "model_class.json"))
+
+ num_train = len(train_generator.filenames)
+ num_test = len(test_generator.filenames)
+ print("Number of experiments (Epochs) : ", self.__num_epochs)
+
+
+ model.fit_generator(train_generator, steps_per_epoch=int(num_train / batch_size), epochs=self.__num_epochs,
+ validation_data=test_generator,
+ validation_steps=int(num_test / batch_size), callbacks=[checkpoint, lr_scheduler])
+
+
+
+
+
+class CustomImageClassification:
+ """
+ This is the image classification class for custom models trained with the 'ClassificationModelTrainer' class. It provides support for 4 different models which are:
+ ResNet50, MobileNetV2, DenseNet121 and Inception V3. After instantiating this class, you can set it's properties and
+ make image classification using it's pre-defined functions.
+
+ The following functions are required to be called before a classification can be made
+ * setModelPath() , path to your custom model
+ * setJsonPath , , path to your custom model's corresponding JSON file
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsMobileNetV2(), setModelTypeAsResNet50(), setModelTypeAsDenseNet121, setModelTypeAsInceptionV3]
+ * loadModel() [This must be called once only before making a classification]
+
+ Once the above functions have been called, you can call the classifyImage() function of the classification instance
+ object at anytime to predict an image.
+ """
+ def __init__(self):
+ self.__modelType = ""
+ self.modelPath = ""
+ self.jsonPath = ""
+ self.numObjects = 10
+ self.__model_classes = dict()
+ self.__modelLoaded = False
+ self.__model_collection = []
+ self.__input_image_size = 224
+
+ def setModelPath(self, model_path):
+ """
+ 'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
+ available 4 model types. The model path must correspond to the model type set for the classification instance object.
+
+ :param model_path:
+ :return:
+ """
+ self.modelPath = model_path
+
+ def setJsonPath(self, model_json):
+ """
+ 'setJsonPath()'
+
+ :param model_path:
+ :return:
+ """
+ self.jsonPath = model_json
+
+ def setModelTypeAsMobileNetV2(self):
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "mobilenetv2"
+
+ def setModelTypeAsResNet50(self):
+ """
+ 'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "resnet50"
+
+ def setModelTypeAsDenseNet121(self):
+ """
+ 'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "densenet121"
+
+ def setModelTypeAsInceptionV3(self):
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "inceptionv3"
+
+ def loadModel(self, classification_speed="normal", num_objects=10):
+ """
+ 'loadModel()' function is used to load the model structure into the program from the file path defined
+ in the setModelPath() function. This function receives an optional value which is "classification_speed".
+ The value is used to reduce the time it takes to classify an image, down to about 50% of the normal time,
+ with just slight changes or drop in classification accuracy, depending on the nature of the image.
+ * classification_speed (optional); Acceptable values are "normal", "fast", "faster" and "fastest"
+
+ :param classification_speed :
+ :return:
+ """
+
+ self.__model_classes = json.load(open(self.jsonPath))
+
+ if(classification_speed=="normal"):
+ self.__input_image_size = 224
+ elif(classification_speed=="fast"):
+ self.__input_image_size = 160
+ elif(classification_speed=="faster"):
+ self.__input_image_size = 120
+ elif (classification_speed == "fastest"):
+ self.__input_image_size = 100
+
+ if (self.__modelLoaded == False):
+
+ image_input = tf.keras.layers.Input(shape=(self.__input_image_size, self.__input_image_size, 3))
+
+ if(self.__modelType == "" ):
+ raise ValueError("You must set a valid model type before loading the model.")
+
+ elif(self.__modelType == "mobilenetv2"):
+ model = tf.keras.applications.MobileNetV2(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects )
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ try:
+ None
+ except:
+ raise ValueError("An error occured. Ensure your model file is a MobileNetV2 Model and is located in the path {}".format(self.modelPath))
+
+ elif(self.__modelType == "resnet50"):
+ try:
+ model = tf.keras.applications.ResNet50(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = num_objects )
+ model.load_weights(self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except:
+ raise ValueError("An error occured. Ensure your model file is a ResNet50 Model and is located in the path {}".format(self.modelPath))
+
+ elif (self.__modelType == "densenet121"):
+ try:
+ model = tf.keras.applications.DenseNet121(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except:
+ raise ValueError("An error occured. Ensure your model file is a DenseNet121 Model and is located in the path {}".format(self.modelPath))
+
+ elif (self.__modelType == "inceptionv3"):
+ try:
+ model = tf.keras.applications.InceptionV3(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects )
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except:
+ raise ValueError("An error occured. Ensure your model file is in {}".format(self.modelPath))
+ def loadFullModel(self, classification_speed="normal", num_objects=10):
+ """
+ 'loadFullModel()' function is used to load the model structure into the program from the file path defined
+ in the setModelPath() function. As opposed to the 'loadModel()' function, you don't need to specify the model type. This means you can load any Keras model trained with or without ImageAI and perform image prediction.
+ - prediction_speed (optional), Acceptable values are "normal", "fast", "faster" and "fastest"
+ - num_objects (required), the number of objects the model is trained to recognize
+
+ :param prediction_speed:
+ :param num_objects:
+ :return:
+ """
+
+ self.numObjects = num_objects
+ self.__model_classes = json.load(open(self.jsonPath))
+
+ if (classification_speed == "normal"):
+ self.__input_image_size = 224
+ elif (classification_speed == "fast"):
+ self.__input_image_size = 160
+ elif (classification_speed == "faster"):
+ self.__input_image_size = 120
+ elif (classification_speed == "fastest"):
+ self.__input_image_size = 100
+
+ if (self.__modelLoaded == False):
+
+ model = tf.keras.models.load_model(filepath=self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ self.__modelType = "full"
+
+ def getModels(self):
+ """
+ 'getModels()' provides access to the internal model collection. Helpful if models are used down the line with tools like lime.
+ :return:
+ """
+ return self.__model_collection
+
+
+ def classifyImage(self, image_input, result_count=5, input_type="file"):
+ """
+ 'classifyImage()' function is used to classify a given image by receiving the following arguments:
+ * input_type (optional) , the type of input to be parsed. Acceptable values are "file", "array" and "stream"
+ * image_input , file path/numpy array/image file stream of the image.
+ * result_count (optional) , the number of classifications to be sent which must be whole numbers between
+ 1 and 1000. The default is 5.
+
+ This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
+ contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
+ contains the percentage probability of each object class. The position of each object class in the 'classification_results'
+ array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
+
+
+ :param input_type:
+ :param image_input:
+ :param result_count:
+ :return classification_results, classification_probabilities:
+ """
+ classification_results = []
+ classification_probabilities = []
+ if (self.__modelLoaded == False):
+ raise ValueError("You must call the loadModel() function before making classification.")
+
+ else:
+ if (input_type == "file"):
+ try:
+ image_to_predict = tf.keras.preprocessing.image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
+ image_to_predict = tf.keras.preprocessing.image.img_to_array(image_to_predict, data_format="channels_last")
+ image_to_predict = np.expand_dims(image_to_predict, axis=0)
+ except:
+ raise ValueError("You have set a path to an invalid image file.")
+ elif (input_type == "array"):
+ try:
+ image_input = Image.fromarray(np.uint8(image_input))
+ image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
+ image_input = np.expand_dims(image_input, axis=0)
+ image_to_predict = image_input.copy()
+ image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
+ except:
+ raise ValueError("You have parsed in a wrong numpy array for the image")
+ elif (input_type == "stream"):
+ try:
+ image_input = Image.open(image_input)
+ image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
+ image_input = np.expand_dims(image_input, axis=0)
+ image_to_predict = image_input.copy()
+ image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
+
+ except:
+ raise ValueError("You have parsed in a wrong stream for the image")
+
+ if (self.__modelType == "mobilenetv2"):
+ image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
+ elif (self.__modelType == "full"):
+ image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
+ elif (self.__modelType == "inceptionv3"):
+ image_to_predict = tf.keras.applications.inception_v3.preprocess_input(image_to_predict)
+ elif (self.__modelType == "densenet121"):
+ image_to_predict = tf.keras.applications.densenet.preprocess_input(image_to_predict)
+ try:
+ model = self.__model_collection[0]
+ prediction = model.predict(image_to_predict, steps=1)
+
+ predictiondata = []
+ for pred in prediction:
+ top_indices = pred.argsort()[-result_count:][::-1]
+ for i in top_indices:
+ each_result = []
+ each_result.append(self.__model_classes[str(i)])
+ each_result.append(pred[i])
+ predictiondata.append(each_result)
+
+ for result in predictiondata:
+ classification_results.append(str(result[0]))
+ classification_probabilities.append(result[1] * 100)
+
+ except:
+ raise ValueError("Error. Ensure your input image is valid")
+
+ return classification_results, classification_probabilities
+
+
+ @deprecated(since="2.1.6", message="'.predictImage()' has been deprecated! Please use 'classifyImage()' instead.")
+ def predictImage(self, image_input, result_count=5, input_type="file"):
+
+ return self.classifyImage(image_input, result_count, input_type)
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Classification/README.md b/imageai_tf_deprecated/Classification/README.md
new file mode 100644
index 00000000..bb4def1f
--- /dev/null
+++ b/imageai_tf_deprecated/Classification/README.md
@@ -0,0 +1,258 @@
+# ImageAI : Image Prediction
+A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)
+
+---
+
+### TABLE OF CONTENTS
+- :white_square_button: First Prediction
+- :white_square_button: Prediction Speed
+- :white_square_button: Image Input Types
+- :white_square_button: Prediction in MultiThreading
+- :white_square_button: Documentation
+
+ImageAI provides 4 different algorithms and model types to perform image prediction.
+To perform image prediction on any picture, take the following simple steps. The 4 algorithms provided for
+ image prediction include **MobileNetV2**, **ResNet50**, **InceptionV3** and **DenseNet121**. Each of these
+ algorithms have individual model files which you must use depending on the choice of your algorithm. To download the
+ model file for your choice of algorithm, click on any of the links below:
+
+- **[MobileNetV2](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/mobilenet_v2.h5)** _(Size = 4.82 mb, fastest prediction time and moderate accuracy)_
+- **[ResNet50](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_imagenet_tf.2.0.h5)** by Microsoft Research _(Size = 98 mb, fast prediction time and high accuracy)_
+ - **[InceptionV3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/inception_v3_weights_tf_dim_ordering_tf_kernels.h5)** by Google Brain team _(Size = 91.6 mb, slow prediction time and higher accuracy)_
+ - **[DenseNet121](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/DenseNet-BC-121-32.h5)** by Facebook AI Research _(Size = 31.6 mb, slower prediction time and highest accuracy)_
+
+ Great! Once you have downloaded this model file, start a new python project, and then copy the model file to your project
+ folder where your python files (.py files) will be . Download the image below, or take any image on your computer
+ and copy it to your python project's folder. Then create a python file and give it a name; an example is `FirstPrediction.py`.
+ Then write the code below into the python file:
+
+### FirstPrediction.py
+
+
+```python
+from imageai.Classification import ImageClassification
+import os
+
+execution_path = os.getcwd()
+
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))
+prediction.loadModel()
+
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction , " : " , eachProbability)
+```
+
+Sample Result:
+
+
+```
+convertible : 52.459555864334106
+sports_car : 37.61284649372101
+pickup : 3.1751200556755066
+car_wheel : 1.817505806684494
+minivan : 1.7487050965428352
+```
+
+The code above works as follows:
+```python
+from imageai.Classification import ImageClassification
+import os
+```
+The code above imports the `ImageAI` library and the python `os` class.
+```python
+execution_path = os.getcwd()
+```
+The above line obtains the path to the folder that contains your python file (in this example, your FirstPrediction.py).
+
+```python
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet50()
+prediction.setModelPath(os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))
+```
+In the lines above, we created and instance of the `ImagePrediction()` class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()` in the second line and then we set the model path of the prediction object to the path of the model file (`resnet50_imagenet_tf.2.0.h5`) we copied to the python file folder in the third line.
+
+```python
+predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
+```
+
+In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 1000) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.
+
+```python
+for eachPrediction, eachProbability in zip(predictions, probabilities):
+ print(eachPrediction, " : " , eachProbability)
+```
+The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.
+
+
+### Prediction Speed
+
+
+**ImageAI** now provides prediction speeds for all image prediction tasks. The prediction speeds allow you to reduce the time of prediction at a rate between 20% - 60%, and yet having just slight changes but accurate prediction results. The available prediction speeds are **"normal"**(default), **"fast"**, **"faster"** and **"fastest"**.
+All you need to do is to state the speed mode you desire when loading the model as seen below.
+
+```python
+prediction.loadModel(prediction_speed="fast")
+```
+
+To observe the differences in the prediction speeds, look below for each speed applied to multiple prediction with time taken to predict and predictions given. The results below are obtained from predictions performed on a Windows 8 laptop with Intel Celeron N2820 CPU, with processor speed of 2.13GHz
+
+**Prediction Speed = "normal" , Prediction Time = 5.9 seconds**
+```
+convertible : 52.459555864334106
+sports_car : 37.61284649372101
+pickup : 3.1751200556755066
+car_wheel : 1.817505806684494
+minivan : 1.7487050965428352
+-----------------------
+toilet_tissue : 13.99008333683014
+jeep : 6.842949986457825
+car_wheel : 6.71963095664978
+seat_belt : 6.704962253570557
+minivan : 5.861184373497963
+-----------------------
+bustard : 52.03368067741394
+vulture : 20.936034619808197
+crane : 10.620515048503876
+kite : 10.20539253950119
+white_stork : 1.6472270712256432
+-----------------------
+```
+
+**Prediction Speed = "fast" , Prediction Time = 3.4 seconds**
+```
+sports_car : 55.5136501789093
+pickup : 19.860029220581055
+convertible : 17.88402795791626
+tow_truck : 2.357563190162182
+car_wheel : 1.8646160140633583
+-----------------------
+drum : 12.241223454475403
+toilet_tissue : 10.96322312951088
+car_wheel : 10.776633024215698
+dial_telephone : 9.840480983257294
+toilet_seat : 8.989936858415604
+-----------------------
+vulture : 52.81011462211609
+bustard : 45.628002285957336
+kite : 0.8065823465585709
+goose : 0.3629807382822037
+crane : 0.21266008261591196
+-----------------------
+```
+
+**Prediction Speed = "faster" , Prediction Time = 2.7 seconds**
+```
+sports_car : 79.90474104881287
+tow_truck : 9.751049429178238
+convertible : 7.056044787168503
+racer : 1.8735893070697784
+car_wheel : 0.7379394955933094
+-----------------------
+oil_filter : 73.52778315544128
+jeep : 11.926891654729843
+reflex_camera : 7.9965077340602875
+Polaroid_camera : 0.9798810817301273
+barbell : 0.8661789819598198
+-----------------------
+vulture : 93.00530552864075
+bustard : 6.636220961809158
+kite : 0.15161558985710144
+bald_eagle : 0.10513027664273977
+crane : 0.05982434959150851
+-----------------------
+```
+
+**Prediction Speed = "fastest" , Prediction Time = 2.2 seconds**
+```
+tow_truck : 62.5033438205719
+sports_car : 31.26143217086792
+racer : 2.2139860317111015
+fire_engine : 1.7813067883253098
+ambulance : 0.8790366351604462
+-----------------------
+reflex_camera : 94.00787949562073
+racer : 2.345871739089489
+jeep : 1.6016140580177307
+oil_filter : 1.4121259562671185
+lens_cap : 0.1283118617720902
+-----------------------
+kite : 98.5377550125122
+vulture : 0.7469987496733665
+bustard : 0.36855682265013456
+bald_eagle : 0.2437378279864788
+great_grey_owl : 0.0699841941241175
+-----------------------
+```
+
+**PLEASE NOTE:** When adjusting speed modes, it is best to use models that have higher accuracies like the DenseNet or InceptionV3 models, or use it in case scenarios where the images predicted are iconic.
+
+
+### Image Input Types
+
+
+Previous version of **ImageAI** supported only file inputs and accepts file paths to an image for image prediction.
+Now, **ImageAI** supports 3 input types which are **file path to image file**(default), **numpy array of image** and **image file stream**.
+This means you can now perform image prediction in production applications such as on a web server and system
+ that returns file in any of the above stated formats.
+
+To perform image prediction with numpy array or file stream input, you just need to state the input type
+in the `.classifyImage()` function. See example below.
+
+```python
+predictions, probabilities = prediction.classifyImage(image_array, result_count=5 , input_type="array" ) # For numpy array input type
+predictions, probabilities = prediction.classifyImage(image_stream, result_count=5 , input_type="stream" ) # For file stream input type
+```
+
+### Prediction in MultiThreading
+
+
+When developing programs that run heavy task on the deafult thread like User Interfaces (UI),
+ you should consider running your predictions in a new thread. When running image prediction using ImageAI in
+ a new thread, you must take note the following:
+- You can create your prediction object, set its model type, set model path and json path
+outside the new thread.
+- The `.loadModel()` must be in the new thread and image prediction (`classifyImage()`) must take place in th new thread.
+
+Take a look of a sample code below on image prediction using multithreading:
+```python
+from imageai.Prediction import ImageClassification
+import os
+import threading
+
+execution_path = os.getcwd()
+
+prediction = ImageClassification()
+prediction.setModelTypeAsResNet()
+prediction.setModelPath( os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))
+
+picturesfolder = os.environ["USERPROFILE"] + "\\Pictures\\"
+allfiles = os.listdir(picturesfolder)
+
+class PredictionThread(threading.Thread):
+ def __init__(self):
+ threading.Thread.__init__(self)
+ def run(self):
+ prediction.loadModel()
+ for eachPicture in allfiles:
+ if eachPicture.endswith(".png") or eachPicture.endswith(".jpg"):
+ predictions, percentage_probabilities = prediction.predictImage(picturesfolder + eachPicture, result_count=1)
+ for prediction, percentage_probability in zip(predictions, probabilities):
+ print(prediction , " : " , percentage_probability)
+
+predictionThread = PredictionThread ()
+predictionThread.start()
+
+```
+
+
+### Documentation
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
+
diff --git a/imageai_tf_deprecated/Classification/__init__.py b/imageai_tf_deprecated/Classification/__init__.py
new file mode 100644
index 00000000..f02928e5
--- /dev/null
+++ b/imageai_tf_deprecated/Classification/__init__.py
@@ -0,0 +1,233 @@
+import tensorflow as tf
+from PIL import Image
+import numpy as np
+from matplotlib.cbook import deprecated
+
+
+class ImageClassification:
+ """
+ This is the image classification class in the ImageAI library. It provides support for 4 different models which are:
+ ResNet, MobileNetV2, DenseNet and Inception V3. After instantiating this class, you can set it's properties and
+ make image classification using it's pre-defined functions.
+
+ The following functions are required to be called before a classification can be made
+ * setModelPath()
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsMobileNetv2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]
+ * loadModel() [This must be called once only before making a classification]
+
+ Once the above functions have been called, you can call the classifyImage() function of the classification instance
+ object at anytime to classify an image.
+ """
+ def __init__(self):
+ self.__modelType = ""
+ self.modelPath = ""
+ self.__modelLoaded = False
+ self.__model_collection = []
+ self.__input_image_size = 224
+
+ def setModelPath(self, model_path):
+ """
+ 'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
+ available 4 model types. The model path must correspond to the model type set for the classification instance object.
+
+ :param model_path:
+ :return:
+ """
+ self.modelPath = model_path
+
+ def setModelTypeAsSqueezeNet(self):
+ raise ValueError("ImageAI no longer support SqueezeNet. You can use MobileNetV2 instead by downloading the MobileNetV2 model and call the function 'setModelTypeAsMobileNetV2'")
+
+ def setModelTypeAsMobileNetV2(self):
+ """
+ 'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "mobilenetv2"
+
+ @deprecated(since="2.1.6", message="'.setModelTypeAsResNet()' has been deprecated! Please use 'setModelTypeAsResNet50()' instead.")
+ def setModelTypeAsResNet(self):
+ return self.setModelTypeAsResNet50()
+
+ def setModelTypeAsResNet50(self):
+ """
+ 'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "resnet50"
+
+ @deprecated(since="2.1.6", message="'.setModelTypeAsDenseNet()' has been deprecated! Please use 'setModelTypeAsDenseNet121()' instead.")
+ def setModelTypeAsDenseNet(self):
+ return self.setModelTypeAsDenseNet121()
+
+ def setModelTypeAsDenseNet121(self):
+ """
+ 'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "densenet121"
+
+ def setModelTypeAsInceptionV3(self):
+ """
+ 'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
+ for the classification instance object .
+ :return:
+ """
+ self.__modelType = "inceptionv3"
+
+ def loadModel(self, classification_speed="normal"):
+ """
+ 'loadModel()' function is used to load the model structure into the program from the file path defined
+ in the setModelPath() function. This function receives an optional value which is "classification_speed".
+ The value is used to reduce the time it takes to classify an image, down to about 50% of the normal time,
+ with just slight changes or drop in classification accuracy, depending on the nature of the image.
+ * classification_speed (optional); Acceptable values are "normal", "fast", "faster" and "fastest"
+
+ :param classification_speed :
+ :return:
+ """
+
+ if(classification_speed=="normal"):
+ self.__input_image_size = 224
+ elif(classification_speed=="fast"):
+ self.__input_image_size = 160
+ elif(classification_speed=="faster"):
+ self.__input_image_size = 120
+ elif (classification_speed == "fastest"):
+ self.__input_image_size = 100
+
+ if (self.__modelLoaded == False):
+
+ if(self.__modelType == "" ):
+ raise ValueError("You must set a valid model type before loading the model.")
+
+ elif(self.__modelType == "mobilenetv2"):
+ model = tf.keras.applications.MobileNetV2(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
+ model.load_weights(self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ try:
+ None
+ except:
+ raise ValueError("An error occured. Ensure your model file is a MobileNetV2 Model and is located in the path {}".format(self.modelPath))
+
+ elif(self.__modelType == "resnet50"):
+ try:
+ model = tf.keras.applications.ResNet50(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
+ model.load_weights(self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except Exception as e:
+ raise ValueError("An error occured. Ensure your model file is a ResNet50 Model and is located in the path {}".format(self.modelPath))
+
+ elif (self.__modelType == "densenet121"):
+ try:
+ model = tf.keras.applications.DenseNet121(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
+ model.load_weights(self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except:
+ raise ValueError("An error occured. Ensure your model file is a DenseNet121 Model and is located in the path {}".format(self.modelPath))
+
+ elif (self.__modelType == "inceptionv3"):
+ try:
+ model = tf.keras.applications.InceptionV3(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
+ model.load_weights(self.modelPath)
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ except:
+ raise ValueError("An error occured. Ensure your model file is in {}".format(self.modelPath))
+
+
+ def classifyImage(self, image_input, result_count=5, input_type="file"):
+ """
+ 'classifyImage()' function is used to classify a given image by receiving the following arguments:
+ * input_type (optional) , the type of input to be parsed. Acceptable values are "file", "array" and "stream"
+ * image_input , file path/numpy array/image file stream of the image.
+ * result_count (optional) , the number of classifications to be sent which must be whole numbers between
+ 1 and 1000. The default is 5.
+
+ This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
+ contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
+ contains the percentage probability of each object class. The position of each object class in the 'classification_results'
+ array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
+
+
+ :param input_type:
+ :param image_input:
+ :param result_count:
+ :return classification_results, classification_probabilities:
+ """
+ classification_results = []
+ classification_probabilities = []
+ if (self.__modelLoaded == False):
+ raise ValueError("You must call the loadModel() function before making classification.")
+
+ else:
+ if (input_type == "file"):
+ try:
+ image_to_predict = tf.keras.preprocessing.image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
+ image_to_predict = tf.keras.preprocessing.image.img_to_array(image_to_predict, data_format="channels_last")
+ image_to_predict = np.expand_dims(image_to_predict, axis=0)
+ except:
+ raise ValueError("You have set a path to an invalid image file.")
+ elif (input_type == "array"):
+ try:
+ image_input = Image.fromarray(np.uint8(image_input))
+ image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
+ image_input = np.expand_dims(image_input, axis=0)
+ image_to_predict = image_input.copy()
+ image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
+ except:
+ raise ValueError("You have parsed in a wrong numpy array for the image")
+ elif (input_type == "stream"):
+ try:
+ image_input = Image.open(image_input)
+ image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
+ image_input = np.expand_dims(image_input, axis=0)
+ image_to_predict = image_input.copy()
+ image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
+
+ except:
+ raise ValueError("You have parsed in a wrong stream for the image")
+
+ if (self.__modelType == "mobilenetv2"):
+ image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
+ elif (self.__modelType == "densenet121"):
+ image_to_predict = tf.keras.applications.densenet.preprocess_input(image_to_predict)
+ elif (self.__modelType == "inceptionv3"):
+ image_to_predict = tf.keras.applications.inception_v3.preprocess_input(image_to_predict)
+
+ try:
+ model = self.__model_collection[0]
+ prediction = model.predict(image_to_predict, steps=1)
+
+ if (self.__modelType == "mobilenetv2"):
+ predictiondata = tf.keras.applications.mobilenet_v2.decode_predictions(prediction, top=int(result_count))
+ elif (self.__modelType == "resnet50"):
+ predictiondata = tf.keras.applications.resnet50.decode_predictions(prediction, top=int(result_count))
+ elif (self.__modelType == "inceptionv3"):
+ predictiondata = tf.keras.applications.inception_v3.decode_predictions(prediction, top=int(result_count))
+ elif (self.__modelType == "densenet121"):
+ predictiondata = tf.keras.applications.densenet.decode_predictions(prediction, top=int(result_count))
+
+
+
+ for results in predictiondata:
+ for result in results:
+ classification_results.append(str(result[1]))
+ classification_probabilities.append(result[2] * 100)
+ except:
+ raise ValueError("An error occured! Try again.")
+
+ return classification_results, classification_probabilities
+
+
+ @deprecated(since="2.1.6", message="'.predictImage()' has been deprecated! Please use 'classifyImage()' instead.")
+ def predictImage(self, image_input, result_count=5, input_type="file"):
+
+ return self.classifyImage(image_input, result_count, input_type)
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTION.md b/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTION.md
new file mode 100644
index 00000000..2d189ba8
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTION.md
@@ -0,0 +1,215 @@
+# ImageAI : Custom Object Detection
+
+An **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)
+
+---
+
+
+### TABLE OF CONTENTS
+
+- :white_square_button: Custom Object Detection
+- :white_square_button: Object Detection, Extraction and Fine-tune
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Image Input & Output Types
+- :white_square_button: Documentation
+
+
+ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image using your own **custom YOLOv3 model** and the corresponding **detection_config.json** generated during the training. To test the custom object detection, you can download a sample custom model we have trained to detect the Hololens headset and its **detection_config.json** file via the links below:
+
+* [**hololens-ex-60--loss-2.76.h5**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5) _(Size = 236 mb)_
+* [**detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json)
+
+
+ Once you download the custom object detection model file, you should copy the model file to the your project folder where your **.py** files will be.
+ Then create a python file and give it a name; an example is FirstCustomDetection.py. Then write the code below into the python file:
+
+### FirstCustomDetection.py
+
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+detector.setJsonPath("detection_config.json")
+detector.loadModel()
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+```
+
+Sample Result - Input:
+
+
+
+ Output:
+
+
+
+```
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+```
+
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+```
+ In the 3 lines above , we import the **ImageAI custom object detection** class in the first line, created the class instance on the second line and set the model type to YOLOv3.
+
+```python
+detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+detector.setJsonPath("detection_config.json")
+detector.loadModel()
+```
+
+ In the 3 lines above, we specified the file path to our downloaded model file in the first line , specified the path to our **detection_config.json** file in the second line and loaded the model on the third line.
+
+```python
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+```
+
+In the 3 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our test image, and the path to the new
+ image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding
+ to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object),
+`percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).
+
+
+
+
+### Object Detection, Extraction and Fine-tune
+
+
+In the examples we used above, we ran the object detection on an image and it
+returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image and save it independently.
+
+
+
+In the example code below which is very identical to the previous object detection code, we will save each object detected as a separate image.
+
+```python
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+detector.setJsonPath("detection_config.json")
+detector.loadModel()
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+```
+
+
+Sample Result: Output Images
+
+
+
+
+
+
+
+
+
+
+
+
+Let us review the part of the code that perform the object detection and extract the images:
+
+```python
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+```
+
+In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image part, and an
+extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image
+and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory
+ which is the `output image path + "-objects"`. Then it saves all the extracted images into this new directory with
+ each image's name being the `detected object name + "-" + a number` which corresponds to the order at which the objects
+ were detected.
+
+This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The
+ first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths
+ to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the
+ first array.
+
+
+
+### And one important feature you need to know!
+
+You will recall that the percentage probability
+ for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter
+ `minimum_percentage_probability` , whose default value is `30` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected
+ object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the
+ detection results. You fine-tune the object
+ detection by setting `minimum_percentage_probability` equal to a smaller value to detect more number of objects or higher value to detect less number of objects.
+
+
+
+
+### Hiding/Showing Object Name and Probability
+
+
+**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `'display_object_name'` and `'display_percentage_probability'` can be set to True of False individually. Take a look at the code below:
+```python
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "holo2.jpg"), output_image_path=os.path.join(execution_path , "holo2_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
+```
+
+In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.
+
+**Result**
+
+
+
+
+### Image Input & Output Types
+
+
+**ImageAI** custom object detection supports 2 input types of inputs which are **file path to image file**(default) and **numpy array of an image**
+as well as 2 types of output which are image **file**(default) and numpy **array **.
+This means you can now perform object detection in production applications such as on a web server and system
+ that returns file in any of the above stated formats.
+ To perform object detection with numpy array input, you just need to state the input type
+in the `.detectObjectsFromImage()` function. See example below.
+
+```python
+detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "holo2-detected.jpg")) # For numpy array input type
+```
+To perform object detection with numpy array output you just need to state the output type
+in the `.detectObjectsFromImage()` function. See example below.
+
+```python
+detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="holo2.jpg" ) # For numpy array output type
+```
+
+
+
+### Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version** [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version** [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
diff --git a/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTIONTRAINING.md b/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTIONTRAINING.md
new file mode 100644
index 00000000..dc023ddc
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTIONTRAINING.md
@@ -0,0 +1,353 @@
+# ImageAI : Custom Detection Model Training
+
+---
+
+**ImageAI** provides the most simple and powerful approach to training custom object detection models
+using the YOLOv3 architeture, which
+which you can load into the `imageai.Detection.Custom.CustomObjectDetection` class. This allows
+ you to train your own model on any set of images that corresponds to any type of objects of interest.
+The training process generates a JSON file that maps the objects names in your image dataset and the detection anchors, as well as creates lots of models. In choosing the best model for your custom object detection task, an `evaluateModel()` function has been provided to compute the **mAP** of your saved models by allowing you to state your desired **IoU** and **Non-maximum Suppression** values. Then you can perform custom
+object detection using the model and the JSON file generated.
+
+### TABLE OF CONTENTS
+- :white_square_button: Preparing your custom dataset
+- :white_square_button: Training on your custom Dataset
+- :white_square_button: Evaluating your saved detection models' mAP
+
+
+### Preparing your custom dataset
+
+
+To train a custom detection model, you need to prepare the images you want to use to train the model.
+You will prepare the images as follows:
+
+1. Decide the type of object(s) you want to detect and collect about **200 (minimum recommendation)** or more picture of each of the object(s)
+2. Once you have collected the images, you need to annotate the object(s) in the images. **ImageAI** uses the **Pascal VOC format** for image annotation. You can generate this annotation for your images using the easy to use [**LabelImg**](https://github.com/tzutalin/labelImg) image annotation tool, available for Windows, Linux and MacOS systems. Open the link below to install the annotation tool. See: [https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)
+3. When you are done annotating your images, **annotation XML** files will be generated for each image in your dataset. The **annotation XML** file describes each or **all** of the objects in the image. For example, if each image your image names are **image(1).jpg**, **image(2).jpg**, **image(3).jpg** till **image(z).jpg**; the corresponding annotation for each of the images will be **image(1).xml**, **image(2).xml**, **image(3).xml** till **image(z).xml**.
+4. Once you have the annotations for all your images, create a folder for your dataset (E.g headsets) and in this parent folder, create child folders **train** and **validation**
+5. In the train folder, create **images** and **annotations**
+ sub-folders. Put about 70-80% of your dataset of each object's images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
+6. In the validation folder, create **images** and **annotations** sub-folders. Put the rest of your dataset images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
+7. Once you have done this, the structure of your image dataset folder should look like below:
+ ```
+ >> train >> images >> img_1.jpg (shows Object_1)
+ >> images >> img_2.jpg (shows Object_2)
+ >> images >> img_3.jpg (shows Object_1, Object_3 and Object_n)
+ >> annotations >> img_1.xml (describes Object_1)
+ >> annotations >> img_2.xml (describes Object_2)
+ >> annotations >> img_3.xml (describes Object_1, Object_3 and Object_n)
+
+ >> validation >> images >> img_151.jpg (shows Object_1, Object_3 and Object_n)
+ >> images >> img_152.jpg (shows Object_2)
+ >> images >> img_153.jpg (shows Object_1)
+ >> annotations >> img_151.xml (describes Object_1, Object_3 and Object_n)
+ >> annotations >> img_152.xml (describes Object_2)
+ >> annotations >> img_153.xml (describes Object_1)
+ ```
+8. You can train your custom detection model completely from scratch or use transfer learning (recommended for better accuracy) from a pre-trained YOLOv3 model. Also, we have provided a sample annotated Hololens and Headsets (Hololens and Oculus) dataset for you to train with. Download the pre-trained YOLOv3 model and the sample datasets in the link below.
+
+[https://github.com/OlafenwaMoses/ImageAI/releases/tag/essential-v4](https://github.com/OlafenwaMoses/ImageAI/releases/tag/essential-v4)
+
+
+### Training on your custom dataset
+
+
+Before you start training your custom detection model, kindly take note of the following:
+
+- The default **batch_size** is 4. If you are training with **Google Colab**, this will be fine. However, I will advice you use a more powerful GPU than the K80 offered by Colab as the higher your **batch_size (8, 16)**, the better the accuracy of your detection model.
+- If you experience '_TfDeviceCaptureOp' object has no attribute '_set_device_from_string' error in Google Colab, it is due to a bug in **Tensorflow**. You can solve this by installing **Tensorflow GPU 1.13.1**.
+ ```bash
+ pip3 install tensorflow-gpu==1.13.1
+ ```
+
+Then your training code goes as follows:
+```python
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="pretrained-yolov3.h5")
+# In the above,when training for detecting multiple objects,
+#set object_names_array=["object1", "object2", "object3",..."objectz"]
+trainer.trainModel()
+```
+
+ Yes! Just 6 lines of code and you can train object detection models on your custom dataset.
+Now lets take a look at how the code above works.
+
+```python
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+```
+
+In the first line, we import the **ImageAI** detection model training class, then we define the model trainer in the second line,
+ we set the network type in the third line and set the path to the image dataset we want to train the network on.
+
+```python
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="pretrained-yolov3.h5")
+```
+
+
+In the line above, we configured our detection model trainer. The parameters we stated in the function as as below:
+
+- **num_objects** : this is an array containing the names of the objects in our dataset
+- **batch_size** : this is to state the batch size for the training
+- **num_experiments** : this is to state the number of times the network will train over all the training images,
+ which is also called epochs
+- **train_from_pretrained_model(optional)** : this is to train using transfer learning from a pre-trained **YOLOv3** model
+
+```python
+trainer.trainModel()
+```
+
+
+When you start the training, you should see something like this in the console:
+```
+Using TensorFlow backend.
+Generating anchor boxes for training images and annotation...
+Average IOU for 9 anchors: 0.78
+Anchor Boxes generated.
+Detection configuration saved in hololens/json/detection_config.json
+Training on: ['hololens']
+Training with Batch Size: 4
+Number of Experiments: 200
+
+Epoch 1/200
+480/480 [==============================] - 395s 823ms/step - loss: 36.9000 - yolo_layer_1_loss: 3.2970 - yolo_layer_2_loss: 9.4923 - yolo_layer_3_loss: 24.1107 - val_loss: 15.6321 - val_yolo_layer_1_loss: 2.0275 - val_yolo_layer_2_loss: 6.4191 - val_yolo_layer_3_loss: 7.1856
+Epoch 2/200
+480/480 [==============================] - 293s 610ms/step - loss: 11.9330 - yolo_layer_1_loss: 1.3968 - yolo_layer_2_loss: 4.2894 - yolo_layer_3_loss: 6.2468 - val_loss: 7.9868 - val_yolo_layer_1_loss: 1.7054 - val_yolo_layer_2_loss: 2.9156 - val_yolo_layer_3_loss: 3.3657
+Epoch 3/200
+480/480 [==============================] - 293s 610ms/step - loss: 7.1228 - yolo_layer_1_loss: 1.0583 - yolo_layer_2_loss: 2.2863 - yolo_layer_3_loss: 3.7782 - val_loss: 6.4964 - val_yolo_layer_1_loss: 1.1391 - val_yolo_layer_2_loss: 2.2058 - val_yolo_layer_3_loss: 3.1514
+Epoch 4/200
+480/480 [==============================] - 297s 618ms/step - loss: 5.5802 - yolo_layer_1_loss: 0.9742 - yolo_layer_2_loss: 1.8916 - yolo_layer_3_loss: 2.7144 - val_loss: 6.4275 - val_yolo_layer_1_loss: 1.6153 - val_yolo_layer_2_loss: 2.1203 - val_yolo_layer_3_loss: 2.6919
+Epoch 5/200
+480/480 [==============================] - 295s 615ms/step - loss: 4.8717 - yolo_layer_1_loss: 0.7568 - yolo_layer_2_loss: 1.6641 - yolo_layer_3_loss: 2.4508 - val_loss: 6.3723 - val_yolo_layer_1_loss: 1.6434 - val_yolo_layer_2_loss: 2.1188 - val_yolo_layer_3_loss: 2.6101
+Epoch 6/200
+480/480 [==============================] - 300s 624ms/step - loss: 4.7989 - yolo_layer_1_loss: 0.8708 - yolo_layer_2_loss: 1.6683 - yolo_layer_3_loss: 2.2598 - val_loss: 5.8672 - val_yolo_layer_1_loss: 1.2349 - val_yolo_layer_2_loss: 2.0504 - val_yolo_layer_3_loss: 2.5820
+Epoch 7/200
+```
+
+Let us explain the details shown above:
+```
+Using TensorFlow backend.
+Generating anchor boxes for training images and annotation...
+Average IOU for 9 anchors: 0.78
+Anchor Boxes generated.
+Detection configuration saved in hololens/json/detection_config.json
+Training on: ['hololens']
+Training with Batch Size: 4
+Number of Experiments: 200
+```
+
+The above details signifies the following:
+- **ImageAI** autogenerates the best match detection **anchor boxes** for your image dataset.
+
+- The anchor boxes and the object names mapping are saved in
+**json/detection_config.json** path of in the image dataset folder. Please note that for every new training you start, a new **detection_config.json** file is generated and is only compatible with the model saved during that training.
+
+```
+Epoch 1/200
+480/480 [==============================] - 395s 823ms/step - loss: 36.9000 - yolo_layer_1_loss: 3.2970 - yolo_layer_2_loss: 9.4923 - yolo_layer_3_loss: 24.1107 - val_loss: 15.6321 - val_yolo_layer_1_loss: 2.0275 - val_yolo_layer_2_loss: 6.4191 - val_yolo_layer_3_loss: 7.1856
+Epoch 2/200
+480/480 [==============================] - 293s 610ms/step - loss: 11.9330 - yolo_layer_1_loss: 1.3968 - yolo_layer_2_loss: 4.2894 - yolo_layer_3_loss: 6.2468 - val_loss: 7.9868 - val_yolo_layer_1_loss: 1.7054 - val_yolo_layer_2_loss: 2.9156 - val_yolo_layer_3_loss: 3.3657
+Epoch 3/200
+480/480 [==============================] - 293s 610ms/step - loss: 7.1228 - yolo_layer_1_loss: 1.0583 - yolo_layer_2_loss: 2.2863 - yolo_layer_3_loss: 3.7782 - val_loss: 6.4964 - val_yolo_layer_1_loss: 1.1391 - val_yolo_layer_2_loss: 2.2058 - val_yolo_layer_3_loss: 3.1514
+Epoch 4/200
+480/480 [==============================] - 297s 618ms/step - loss: 5.5802 - yolo_layer_1_loss: 0.9742 - yolo_layer_2_loss: 1.8916 - yolo_layer_3_loss: 2.7144 - val_loss: 6.4275 - val_yolo_layer_1_loss: 1.6153 - val_yolo_layer_2_loss: 2.1203 - val_yolo_layer_3_loss: 2.6919
+Epoch 5/200
+480/480 [==============================] - 295s 615ms/step - loss: 4.8717 - yolo_layer_1_loss: 0.7568 - yolo_layer_2_loss: 1.6641 - yolo_layer_3_loss: 2.4508 - val_loss: 6.3723 - val_yolo_layer_1_loss: 1.6434 - val_yolo_layer_2_loss: 2.1188 - val_yolo_layer_3_loss: 2.6101
+Epoch 6/200
+480/480 [==============================] - 300s 624ms/step - loss: 4.7989 - yolo_layer_1_loss: 0.8708 - yolo_layer_2_loss: 1.6683 - yolo_layer_3_loss: 2.2598 - val_loss: 5.8672 - val_yolo_layer_1_loss: 1.2349 - val_yolo_layer_2_loss: 2.0504 - val_yolo_layer_3_loss: 2.5820
+Epoch 7/200
+```
+
+- The above signifies the progress of the training.
+- For each experiment (Epoch), the general total validation loss (E.g - loss: 4.7582) is reported.
+- For each drop in the loss after an experiment, a model is saved in the **hololens/models** folder. The lower the loss, the better the model.
+- **Tensorboard** report file for the training will be saved in the **hololens/logs** folder.
+
+Once you are done training, you can visit the link below for performing object detection with your **custom detection model** and **detection_config.json** file.
+
+[Detection/Custom/CUSTOMDETECTION.md](./CUSTOMDETECTION.md)
+
+
+
+### Evaluating your saved detection models' mAP
+
+
+After training on your custom dataset, you can evaluate the mAP of your saved models by specifying your desired IoU and Non-maximum suppression values. See details as below:
+
+- **Single Model Evaluation:** To evaluate a single model, simply use the example code below with the path to your dataset directory, the model file and the **detection_config.json** file saved during the training. In the example, we used an **object_threshold** of 0.3 ( percentage_score >= 30% ), **IoU** of 0.5 and **Non-maximum suppression** value of 0.5.
+ ```python
+ from imageai.Detection.Custom import DetectionModelTrainer
+
+ trainer = DetectionModelTrainer()
+ trainer.setModelTypeAsYOLOv3()
+ trainer.setDataDirectory(data_directory="hololens")
+ metrics = trainer.evaluateModel(model_path="detection_model-ex-60--loss-2.76.h5", json_path="detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
+ ```
+ Consider that `trainer.evaluateModel` method will show the metrics on standard output as shown below,
+ but also returns a list of dicts containing all the information that is displayed.
+
+ Sample Result:
+ ```
+ Model File: hololens_detection_model-ex-09--loss-4.01.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9613
+ mAP: 0.9613
+ ===============================
+ ```
+ Let's see how those metrics looks like:
+ ```
+ [{
+ 'average_precision': {'hololens': 0.9613334437735249},
+ 'map': 0.9613334437735249,
+ 'model_file': 'hololens_detection_model-ex-09--loss-4.01.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ }]
+ ```
+- **Multi Model Evaluation:** To evaluate all your saved models, simply parse in the path to the folder containing the models as the **model_path** as seen in the example below:
+ ```python
+ from imageai.Detection.Custom import DetectionModelTrainer
+
+ trainer = DetectionModelTrainer()
+ trainer.setModelTypeAsYOLOv3()
+ trainer.setDataDirectory(data_directory="hololens")
+ metrics = trainer.evaluateModel(model_path="hololens/models", json_path="hololens/json/detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
+ ```
+ Sample Result:
+ ```
+ Model File: hololens/models/detection_model-ex-07--loss-4.42.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9231
+ mAP: 0.9231
+ ===============================
+ Model File: hololens/models/detection_model-ex-10--loss-3.95.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9725
+ mAP: 0.9725
+ ===============================
+ Model File: hololens/models/detection_model-ex-05--loss-5.26.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9204
+ mAP: 0.9204
+ ===============================
+ Model File: hololens/models/detection_model-ex-03--loss-6.44.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.8120
+ mAP: 0.8120
+ ===============================
+ Model File: hololens/models/detection_model-ex-18--loss-2.96.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9431
+ mAP: 0.9431
+ ===============================
+ Model File: hololens/models/detection_model-ex-17--loss-3.10.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9404
+ mAP: 0.9404
+ ===============================
+ Model File: hololens/models/detection_model-ex-08--loss-4.16.h5
+ Using IoU : 0.5
+ Using Object Threshold : 0.3
+ Using Non-Maximum Suppression : 0.5
+ hololens: 0.9725
+ mAP: 0.9725
+ ===============================
+ ```
+ Let's see how those metrics looks like:
+ ```
+ [{
+ 'average_precision': {'hololens': 0.9231334437735249},
+ 'map': 0.9231334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-07--loss-4.42.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.9725334437735249},
+ 'map': 0.97251334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-10--loss-3.95.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.92041334437735249},
+ 'map': 0.92041334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-05--loss-5.26.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.81201334437735249},
+ 'map': 0.81201334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-03--loss-6.44.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.94311334437735249},
+ 'map': 0.94311334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-18--loss-2.96.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.94041334437735249},
+ 'map': 0.94041334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-17--loss-3.10.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ },
+ {
+ 'average_precision': {'hololens': 0.97251334437735249},
+ 'map': 0.97251334437735249,
+ 'model_file': 'hololens/models/detection_model-ex-08--loss-4.16.h5',
+ 'using_iou': 0.5,
+ 'using_non_maximum_suppression': 0.5,
+ 'using_object_threshold': 0.3
+ }
+ ]
+ ```
+
+
+### >> Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)
+* Documentation - **Chinese Version** [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
+* Documentation - **French Version** [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)
+
+
+
+
+
+
diff --git a/imageai_tf_deprecated/Detection/Custom/CUSTOMVIDEODETECTION.md b/imageai_tf_deprecated/Detection/Custom/CUSTOMVIDEODETECTION.md
new file mode 100644
index 00000000..f1814776
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/CUSTOMVIDEODETECTION.md
@@ -0,0 +1,251 @@
+# ImageAI : Custom Video Object Detection, Tracking and Analysis
+
+An **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)
+
+---
+
+### TABLE OF CONTENTS
+
+- :white_square_button: First Custom Video Object Detection
+- :white_square_button: Camera / Live Stream Video Detection
+- :white_square_button: Video Analysis
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Frame Detection Intervals
+- :white_square_button: Video Detection Timeout (NEW)
+- :white_square_button: Documentation
+
+
+ImageAI provides convenient, flexible and powerful methods to perform object detection on videos using your own **custom YOLOv3 model** and the corresponding **detection_config.json** generated during the training. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
+To test the custom video object detection,you can download a sample custom model we have trained to detect the Hololens headset and its **detection_config.json** file via the links below:
+- [**hololens-ex-60--loss-2.76.h5**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5) _(Size = 236 mb)_
+- [**detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json)
+
+
+Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
+ installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this
+ experiment as it has an NVIDIA K80 GPU available for free.
+
+ Once you download the custom object detection model and JSON files, you should copy the model and the JSON files to the your project folder where your .py files will be.
+ Then create a python file and give it a name; an example is FirstCustomVideoObjectDetection.py. Then write the code below into the python file:
+
+
+### FirstCustomVideoObjectDetection.py
+
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+video_detector.setJsonPath("detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+[**Input Video**](../../../data-videos/holo1.mp4)
+[](../../../data-videos/holo1.mp4)
+[**Output Video**](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
+[](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
+
+
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+```
+
+In the 3 lines above , we import the **ImageAI custom video object detection** class in the first line, import the **os** in the second line and obtained
+ the path to folder where our python file runs.
+```python
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+video_detector.setJsonPath("detection_config.json")
+video_detector.loadModel()
+```
+In the 4 lines above, we created a new instance of the `CustomVideoObjectDetection` class in the first line, set the model type to YOLOv3 in the second line,
+ set the model path to our custom YOLOv3 model file in the third line, specified the path to the model's corresponding **detection_config.json** in the fourth line and load the model in the fifth line.
+
+```python
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+In the code above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new
+ video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that
+ you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video
+ which contains boxes and percentage probabilities rendered on objects detected in the video.
+
+
+### Camera / Live Stream Video Detection
+
+
+**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s **VideoCapture()** function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s **detectObjectsFromVideo()** function. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+video_detector.setJsonPath("detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter **camera_input** which replaces the **input_file_path** that is used for video file.
+
+
+### Video Analysis
+
+
+**ImageAI** now provide commercial-grade video analysis in the Custom Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.
+
+For video analysis, the **detectObjectsFromVideo()** now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.
+
+To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the **per_frame_function**, **per_second_function**, **per_minute_function** and **video_complete_function** parameters in the detection function. Find below examples of video analysis functions.
+
+```python
+def forFrame(frame_number, output_array, output_count):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("------------END OF A FRAME --------------")
+
+def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
+ print("SECOND : ", second_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last second: ", average_output_count)
+ print("------------END OF A SECOND --------------")
+
+def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
+ print("MINUTE : ", minute_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last minute: ", average_output_count)
+ print("------------END OF A MINUTE --------------")
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+video_detector.setJsonPath("detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20, per_second_function=forSeconds, per_frame_function = forFrame, per_minute_function= forMinute,
+ minimum_percentage_probability=40,
+ log_progress=True)
+```
+
+
+**ImageAI** also allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your **.detectObjectsFromVideo()** function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function:
+```python
+def forFull(output_arrays, count_arrays, average_output_count):
+ #Perform action on the 3 parameters returned into the function
+
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ video_complete_function=forFull,
+ minimum_percentage_probability=40,
+ log_progress=True)
+
+```
+
+**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set **return_detected_frame=True** in your **detectObjectsFromVideo()** function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:
+
+```python
+def forFrame(frame_number, output_array, output_count, detected_frame):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("Returned Objects is : ", type(detected_frame))
+ print("------------END OF A FRAME --------------")
+
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ per_frame_function=forFrame,
+ minimum_percentage_probability=40,
+ log_progress=True, return_detected_frame=True)
+```
+
+
+### Frame Detection Intervals
+
+
+The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame
+of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up
+your video detection process. When calling the `.detectObjectsFromVideo()`, you can
+specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be
+ equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
+If your output video **frames_per_second** is set to 20, that means the object detections in the video will
+ be updated once in every quarter of a second or every second. This is useful in case scenarios where the available
+ compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time
+, half-a-second-real-time or whichever way suits your needs.
+
+
+### Custom Video Detection Timeout
+
+
+**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed. To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes).
+
+
+```python
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
+video_detector.setJsonPath("detection_config.json")
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20, minimum_percentage_probability=40,
+ detection_timeout=120)
+```
+
+
+### >> Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+* Documentation - **English Version** [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version** [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version** [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
+
diff --git a/imageai_tf_deprecated/Detection/Custom/__init__.py b/imageai_tf_deprecated/Detection/Custom/__init__.py
new file mode 100644
index 00000000..e20e188e
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/__init__.py
@@ -0,0 +1,1395 @@
+import os
+import re
+import numpy as np
+import json
+from imageai.Detection.Custom.voc import parse_voc_annotation
+from imageai.Detection.YOLO.yolov3 import yolov3_main, yolov3_train, dummy_loss
+from imageai.Detection.Custom.generator import BatchGenerator
+from imageai.Detection.Custom.utils.utils import normalize, evaluate, makedirs
+from tensorflow.keras.callbacks import ReduceLROnPlateau
+from tensorflow.keras.optimizers import Adam
+from imageai.Detection.Custom.callbacks import CustomModelCheckpoint
+from imageai.Detection.Custom.utils.multi_gpu_model import multi_gpu_model
+from imageai.Detection.Custom.gen_anchors import generateAnchors
+import tensorflow as tf
+from tensorflow.keras.models import load_model
+from tensorflow.keras import Input
+from tensorflow.keras.callbacks import TensorBoard
+import tensorflow.keras.backend as K
+import cv2
+
+tf.config.run_functions_eagerly(True)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+class DetectionModelTrainer:
+
+ """
+ This is the Detection Model training class, which allows you to train object detection models
+ on image datasets that are in Pascal VOC annotation format, using the YOLOv3.
+ """
+
+ def __init__(self):
+ self.__model_type = ""
+ self.__training_mode = True
+
+ self.__model_min_input_size = 288
+ self.__model_max_input_size = 448
+ self.__model_anchors = []
+ self.__inference_anchors = []
+ self.__json_directory = ""
+ self.__model_labels = []
+ self.__num_objects = 0
+ self.__pre_trained_model = ""
+
+ self.__train_images_folder = ""
+ self.__train_annotations_folder = ""
+ self.__train_cache_file = ""
+ self.__train_times = 8
+ self.__train_batch_size = 4
+ self.__train_learning_rate = 1e-4
+ self.__train_epochs = 100
+ self.__train_warmup_epochs = 3
+ self.__train_ignore_treshold = 0.5
+ self.__train_gpus = "0"
+ self.__train_grid_scales = [1, 1, 1]
+ self.__train_obj_scale = 5
+ self.__train_noobj_scale = 1
+ self.__train_xywh_scale = 1
+ self.__train_class_scale = 1
+ self.__model_directory = ""
+ self.__train_weights_name = ""
+ self.__train_debug = True
+ self.__logs_directory = ""
+
+ self.__validation_images_folder = ""
+ self.__validation_annotations_folder = ""
+ self.__validation_cache_file = ""
+ self.__validation_times = 1
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
+ for the training instance object .
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setDataDirectory(self, data_directory):
+
+ """
+
+ 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
+ training is kept. The directory can have any name, but it must have 'train' and 'validation'
+ sub-directory. In the 'train' and 'validation' sub-directories, there must be 'images' and 'annotations'
+ sub-directories respectively. The 'images' folder will contain the pictures for the dataset and the
+ 'annotations' folder will contain the XML files with details of the annotations for each image in the
+ 'images folder'.
+
+ N.B: Strictly take note that the filenames (without the extension) of the pictures in the 'images folder'
+ must be the same as the filenames (without the extension) of their corresponding annotation XML files in
+ the 'annotations' folder.
+
+ The structure of the 'train' and 'validation' folder must be as follows:
+
+ >> train >> images >> img_1.jpg
+ >> images >> img_2.jpg
+ >> images >> img_3.jpg
+ >> annotations >> img_1.xml
+ >> annotations >> img_2.xml
+ >> annotations >> img_3.xml
+
+
+ >> validation >> images >> img_151.jpg
+ >> images >> img_152.jpg
+ >> images >> img_153.jpg
+ >> annotations >> img_151.xml
+ >> annotations >> img_152.xml
+ >> annotations >> img_153.xml
+
+ :param data_directory:
+ :return:
+ """
+
+ self.__train_images_folder = os.path.join(data_directory, "train", "images")
+ self.__train_annotations_folder = os.path.join(data_directory, "train", "annotations")
+ self.__validation_images_folder = os.path.join(data_directory, "validation", "images")
+ self.__validation_annotations_folder = os.path.join(data_directory, "validation", "annotations")
+
+ os.makedirs(os.path.join(data_directory, "cache"), exist_ok=True)
+ self.__train_cache_file = os.path.join(data_directory, "cache", "detection_train_data.pkl")
+ self.__validation_cache_file = os.path.join(data_directory, "cache", "detection_test_data.pkl")
+
+ os.makedirs(os.path.join(data_directory, "models"), exist_ok=True)
+
+ os.makedirs(os.path.join(data_directory, "json"), exist_ok=True)
+
+ os.makedirs(os.path.join(data_directory, "logs"), exist_ok=True)
+
+ self.__model_directory = os.path.join(data_directory, "models")
+ self.__train_weights_name = os.path.join(self.__model_directory, "detection_model-")
+ self.__json_directory = os.path.join(data_directory, "json")
+ self.__logs_directory = os.path.join(data_directory, "logs")
+
+ def setGpuUsage(self, train_gpus):
+ """
+ 'setGpuUsage' function allows you to set the GPUs to be used while training
+ train_gpu can be:
+ - an integer, indicating the number of GPUs to use
+ - a list of integers, indicating the id of the GPUs to be used
+ - a string, indicating the it og the id of the GPUs to be used, separated by commas
+ :param train_gpus: gpus where to run
+ :return:
+ """
+ # train_gpus, could be a string separated by comma, or a list of int or the number of GPUs to be used
+ if type(train_gpus) == str:
+ train_gpus = train_gpus.split(',')
+ if type(train_gpus) == int:
+ train_gpus = range(train_gpus)
+ # let it as a string separated by commas
+ self.__train_gpus = ','.join([str(gpu) for gpu in train_gpus])
+
+ def setTrainConfig(self, object_names_array, batch_size=4, num_experiments=100, train_from_pretrained_model=""):
+
+ """
+
+ 'setTrainConfig()' function allows you to set the properties for the training instances. It accepts the following values:
+
+ - object_names_array , this is an array of the names of the different objects in your dataset
+ - batch_size (optional), this is the batch size for the training instance
+ - num_experiments (optional), also known as epochs, it is the number of times the network will train on all the training dataset
+ - train_from_pretrained_model (optional), this is used to perform transfer learning by specifying the path to a pre-trained YOLOv3 model
+
+ :param object_names_array:
+ :param batch_size:
+ :param num_experiments:
+ :param train_from_pretrained_model:
+ :return:
+ """
+
+ # Remove cache files
+ if os.path.isfile(self.__train_cache_file) == True:
+ os.remove(self.__train_cache_file)
+
+ if os.path.isfile(self.__validation_cache_file) == True:
+ os.remove(self.__validation_cache_file)
+
+ self.__model_anchors, self.__inference_anchors = generateAnchors(self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file, self.__model_labels)
+
+ self.__model_labels = sorted(object_names_array)
+ self.__num_objects = len(object_names_array)
+
+ self.__train_batch_size = batch_size
+ self.__train_epochs = num_experiments
+ self.__pre_trained_model = train_from_pretrained_model
+
+ json_data = dict()
+ json_data["labels"] = self.__model_labels
+ json_data["anchors"] = self.__inference_anchors
+
+ with open(os.path.join(self.__json_directory, "detection_config.json"), "w+") as json_file:
+ json.dump(json_data, json_file, indent=4, separators=(",", " : "),
+ ensure_ascii=True)
+
+ print("Detection configuration saved in ", os.path.join(self.__json_directory, "detection_config.json"))
+
+ def trainModel(self):
+
+ """
+ 'trainModel()' function starts the actual model training. Once the training starts, the training instance
+ creates 3 sub-folders in your dataset folder which are:
+
+ - json, where the JSON configuration file for using your trained model is stored
+ - models, where your trained models are stored once they are generated after each improved experiments
+ - cache , where temporary traing configuraton files are stored
+
+ :return:
+ """
+
+ train_ints, valid_ints, labels, max_box_per_image = self._create_training_instances(
+ self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file,
+ self.__validation_annotations_folder,
+ self.__validation_images_folder,
+ self.__validation_cache_file,
+ self.__model_labels
+
+ )
+ if self.__training_mode:
+ print('Training on: \t' + str(labels) + '')
+ print("Training with Batch Size: ", self.__train_batch_size)
+ print("Number of Training Samples: ", len(train_ints))
+ print("Number of Validation Samples: ", len(valid_ints))
+ print("Number of Experiments: ", self.__train_epochs)
+
+ ###############################
+ # Create the generators
+ ###############################
+ train_generator = BatchGenerator(
+ instances=train_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.3,
+ norm=normalize
+ )
+
+ valid_generator = BatchGenerator(
+ instances=valid_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.0,
+ norm=normalize
+ )
+
+ ###############################
+ # Create the model
+ ###############################
+ if os.path.exists(self.__pre_trained_model):
+ self.__train_warmup_epochs = 0
+ warmup_batches = self.__train_warmup_epochs * (self.__train_times * len(train_generator))
+
+ os.environ['CUDA_VISIBLE_DEVICES'] = self.__train_gpus
+ multi_gpu = [int(gpu) for gpu in self.__train_gpus.split(',')]
+
+ """train_model, infer_model = self._create_model(
+ nb_class=len(labels),
+ anchors=self.__model_anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=[self.__model_max_input_size, self.__model_max_input_size],
+ batch_size=self.__train_batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=self.__train_ignore_treshold,
+ multi_gpu=multi_gpu,
+ lr=self.__train_learning_rate,
+ grid_scales=self.__train_grid_scales,
+ obj_scale=self.__train_obj_scale,
+ noobj_scale=self.__train_noobj_scale,
+ xywh_scale=self.__train_xywh_scale,
+ class_scale=self.__train_class_scale,
+ )"""
+
+ train_model, infer_model = self._create_model(
+ nb_class=len(labels),
+ anchors=self.__model_anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=[self.__model_max_input_size, self.__model_max_input_size],
+ batch_size=self.__train_batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=self.__train_ignore_treshold,
+ multi_gpu=multi_gpu,
+ lr=self.__train_learning_rate,
+ grid_scales=self.__train_grid_scales,
+ obj_scale=self.__train_obj_scale,
+ noobj_scale=self.__train_noobj_scale,
+ xywh_scale=self.__train_xywh_scale,
+ class_scale=self.__train_class_scale,
+ )
+
+ ###############################
+ # Kick off the training
+ ###############################
+ callbacks = self._create_callbacks(self.__train_weights_name, infer_model)
+
+ train_model.fit_generator(
+ generator=train_generator,
+ steps_per_epoch=len(train_generator) * self.__train_times,
+ validation_data=valid_generator,
+ validation_steps=len(valid_generator) * self.__train_times,
+ epochs=self.__train_epochs + self.__train_warmup_epochs,
+ verbose=1,
+ callbacks=callbacks,
+ workers=4,
+ max_queue_size=8
+ )
+
+ def evaluateModel(self, model_path, json_path, batch_size=4, iou_threshold=0.5, object_threshold=0.2, nms_threshold=0.45):
+ """
+
+ 'evaluateModel()' is used to obtain the mAP metrics for your model(s). It accepts the following values:
+
+ - model_path ( model file or folder), this value can be the part to your model file or the path to the folder containing all your saved model files
+ - json_path , this is the path the the 'detection_config.json' file saved for the dataset during the training
+ - iou_threshold , this value is used to set the desired 'IoU' to obtain the mAP metrics for your model(s)
+ - object_threshold , this is used to set your desired minimum 'class score' to obtain the mAP metrics for your model(s)
+ - nms_threshold , this is used to set your desired 'Non-maximum suppresion' to obtain the mAP metrics for your model(s)
+
+ :param model_path:
+ :param json_path:
+ :param batch_size:
+ :param iou_threshold:
+ :param object_threshold:
+ :param nms_threshold:
+ :return: list of dictionaries, containing one dict per evaluated model.
+ Each dict contains exactly the same metrics that are printed on standard output
+ """
+
+ self.__training_mode = False
+
+ with open(json_path, 'r') as json_file:
+ detection_model_json = json.load(json_file)
+
+ temp_anchor_array = []
+ new_anchor_array = []
+
+ temp_anchor_array.append(detection_model_json["anchors"][2])
+ temp_anchor_array.append(detection_model_json["anchors"][1])
+ temp_anchor_array.append(detection_model_json["anchors"][0])
+
+ for aa in temp_anchor_array:
+ for aaa in aa:
+ new_anchor_array.append(aaa)
+
+ self.__model_anchors = new_anchor_array
+ self.__model_labels = detection_model_json["labels"]
+ self.__num_objects = len(self.__model_labels)
+
+ self.__train_batch_size = batch_size
+ self.__train_epochs = 100
+
+ print("Starting Model evaluation....")
+
+ _, valid_ints, labels, max_box_per_image = self._create_training_instances(
+ self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file,
+ self.__validation_annotations_folder,
+ self.__validation_images_folder,
+ self.__validation_cache_file,
+ self.__model_labels
+
+ )
+
+ if len(valid_ints) == 0:
+ print('Validation samples were not provided.')
+ print('Please, check your validation samples are correctly provided:')
+ print('\tAnnotations: {}\n\tImages: {}'.format(self.__validation_annotations_folder,
+ self.__validation_images_folder))
+
+ valid_generator = BatchGenerator(
+ instances=valid_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.0,
+ norm=normalize
+ )
+
+ results = list()
+
+ if os.path.isfile(model_path):
+ # model_files must be a list containing the complete path to the files,
+ # if a file is given, then the list contains just this file
+ model_files = [model_path]
+ elif os.path.isdir(model_path):
+ # model_files must be a list containing the complete path to the files,
+ # if a folder is given, then the list contains the complete path to each file on that folder
+ model_files = sorted([os.path.join(model_path, file_name) for file_name in os.listdir(model_path)])
+ # sort the files to make sure we're always evaluating them on same order
+ else:
+ print('model_path must be the path to a .h5 file or a directory. Found {}'.format(model_path))
+ return results
+
+ for model_file in model_files:
+ if str(model_file).endswith(".h5"):
+ try:
+ infer_model = load_model(model_file)
+
+ ###############################
+ # Run the evaluation
+ ###############################
+ # compute mAP for all the classes
+ average_precisions = evaluate(infer_model, valid_generator, iou_threshold=iou_threshold,
+ obj_thresh=object_threshold, nms_thresh=nms_threshold)
+
+ result_dict = {
+ 'model_file': model_file,
+ 'using_iou': iou_threshold,
+ 'using_object_threshold': object_threshold,
+ 'using_non_maximum_suppression': nms_threshold,
+ 'average_precision': dict(),
+ 'evaluation_samples': len(valid_ints)
+ }
+ # print the score
+ print("Model File: ", model_file, '\n')
+ print("Evaluation samples: ", len(valid_ints))
+ print("Using IoU: ", iou_threshold)
+ print("Using Object Threshold: ", object_threshold)
+ print("Using Non-Maximum Suppression: ", nms_threshold)
+
+ for label, average_precision in average_precisions.items():
+ print(labels[label] + ': {:.4f}'.format(average_precision))
+ result_dict['average_precision'][labels[label]] = average_precision
+
+ print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
+ result_dict['map'] = sum(average_precisions.values()) / len(average_precisions)
+ print("===============================")
+
+ results.append(result_dict)
+ except Exception as e:
+ print('skipping the evaluation of {} because following exception occurred: {}'.format(model_file, e))
+ continue
+ else:
+ print('skipping the evaluation of {} since it\'s not a .h5 file'.format(model_file))
+
+ return results
+
+ def _create_training_instances(self,
+ train_annot_folder,
+ train_image_folder,
+ train_cache,
+ valid_annot_folder,
+ valid_image_folder,
+ valid_cache,
+ labels,
+ ):
+
+ # parse annotations of the training set
+ train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)
+
+ # parse annotations of the validation set, if any, otherwise split the training set
+
+ if os.path.exists(valid_annot_folder):
+ valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)
+ print('Evaluating over {} samples taken from {}'.format(len(valid_ints),
+ os.path.dirname(valid_annot_folder)))
+ else:
+
+ train_portion = 0.8 # use 80% to train and the remaining 20% to evaluate
+ train_valid_split = int(round(train_portion * len(train_ints)))
+ np.random.seed(0)
+ np.random.shuffle(train_ints)
+
+ valid_ints = train_ints[train_valid_split:]
+ train_ints = train_ints[:train_valid_split]
+ print('Evaluating over {} samples taken as {:5.2f}% of the training set '
+ 'given at {}'.format(len(valid_ints),
+ (1 - train_portion)*100,
+ os.path.dirname(train_annot_folder)))
+
+ print('Training over {} samples given at {}'.format(len(train_ints), os.path.dirname(train_annot_folder)))
+
+ # compare the seen labels with the given labels in config.json
+ if len(labels) > 0:
+ overlap_labels = set(labels).intersection(set(train_labels.keys()))
+
+ # return None, None, None if some given label is not in the dataset
+ if len(overlap_labels) < len(labels):
+ if self.__training_mode:
+ print('Some labels have no annotations! Please revise the list of labels in your configuration.')
+ return None, None, None, None
+ else:
+ if self.__training_mode:
+ print('No labels are provided. Train on all seen labels.')
+ print(train_labels)
+
+ labels = train_labels.keys()
+
+ max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
+
+ return train_ints, valid_ints, sorted(labels), max_box_per_image
+
+ def _create_callbacks(self, saved_weights_name, model_to_save):
+
+ checkpoint = CustomModelCheckpoint(
+ model_to_save=model_to_save,
+ filepath=saved_weights_name + 'ex-{epoch:03d}--loss-{loss:08.3f}.h5',
+ monitor='loss',
+ verbose=0,
+ save_best_only=True,
+ mode='min',
+ period=1
+ )
+ reduce_on_plateau = ReduceLROnPlateau(
+ monitor='loss',
+ factor=0.1,
+ patience=2,
+ verbose=0,
+ mode='min',
+ epsilon=0.01,
+ cooldown=0,
+ min_lr=0
+ )
+ tensor_board = TensorBoard(
+ log_dir=self.__logs_directory
+ )
+ return [checkpoint, reduce_on_plateau, tensor_board]
+
+ def _create_model(
+ self,
+ nb_class,
+ anchors,
+ max_box_per_image,
+ max_grid, batch_size,
+ warmup_batches,
+ ignore_thresh,
+ multi_gpu,
+ lr,
+ grid_scales,
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale
+ ):
+ if len(multi_gpu) > 1:
+ with tf.device('/cpu:0'):
+ template_model, infer_model = yolov3_train(
+ num_classes=nb_class,
+ anchors=anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=max_grid,
+ batch_size=batch_size // len(multi_gpu),
+ warmup_batches=warmup_batches,
+ ignore_thresh=ignore_thresh,
+ grid_scales=grid_scales,
+ obj_scale=obj_scale,
+ noobj_scale=noobj_scale,
+ xywh_scale=xywh_scale,
+ class_scale=class_scale
+ )
+ else:
+ template_model, infer_model = yolov3_train(
+ num_classes=nb_class,
+ anchors=anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=max_grid,
+ batch_size=batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=ignore_thresh,
+ grid_scales=grid_scales,
+ obj_scale=obj_scale,
+ noobj_scale=noobj_scale,
+ xywh_scale=xywh_scale,
+ class_scale=class_scale
+ )
+
+ # load the pretrained weight if exists, otherwise load the backend weight only
+
+ if len(self.__pre_trained_model) > 3:
+ if self.__training_mode:
+ print("Training with transfer learning from pretrained Model")
+ template_model.load_weights(self.__pre_trained_model, by_name=True)
+ else:
+ if self.__training_mode:
+ print("Pre-trained Model not provided. Transfer learning not in use.")
+ print("Training will start with 3 warmup experiments")
+
+ if len(multi_gpu) > 1:
+ train_model = multi_gpu_model(template_model, gpus=multi_gpu)
+ else:
+ train_model = template_model
+
+ optimizer = Adam(lr=lr, clipnorm=0.001)
+ train_model.compile(loss=dummy_loss, optimizer=optimizer)
+
+ return train_model, infer_model
+
+
+class CustomObjectDetection:
+
+ """
+ This is the object detection class for using your custom trained models. It supports your custom trained YOLOv3 model and allows to you to perform object detection in images.
+ """
+
+ def __init__(self):
+ self.__model_type = ""
+ self.__model_path = ""
+ self.__model_labels = []
+ self.__model_anchors = []
+ self.__detection_config_json_path = ""
+ self.__input_size = 416
+ self.__object_threshold = 0.4
+ self.__nms_threshold = 0.4
+ self.__model = None
+ self.__detection_utils = CustomDetectionUtils(labels=[])
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setModelPath(self, detection_model_path):
+ """
+ 'setModelPath' is used to specify the filepath to your custom detection model
+ :param detection_model_path: path to the .h5 model file.
+ Usually is one of those under /models/detection_model-ex-ddd--loss-dddd.ddd.h5
+ :return: None
+ """
+ self.__model_path = detection_model_path
+
+ def setJsonPath(self, configuration_json):
+ """
+ 'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model
+ :param configuration_json: path to the .json file. Usually it is /json/detection_config.json
+ :return: None
+ """
+ self.__detection_config_json_path = configuration_json
+
+ def loadModel(self):
+
+ """
+ 'loadModel' is used to load the model into the CustomObjectDetection class
+ :return: None
+ """
+
+ if self.__model_type == "yolov3":
+ detection_model_json = json.load(open(self.__detection_config_json_path))
+
+ self.__model_labels = detection_model_json["labels"]
+ self.__model_anchors = detection_model_json["anchors"]
+
+ self.__detection_utils = CustomDetectionUtils(labels=self.__model_labels)
+
+ self.__model = yolov3_main(Input(shape=(None, None, 3)), 3, len(self.__model_labels))
+
+ self.__model.load_weights(self.__model_path)
+
+ def detectObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
+ extract_detected_objects=False, minimum_percentage_probability=50, nms_treshold=0.4,
+ display_percentage_probability=True, display_object_name=True, thread_safe=False):
+
+ """
+
+ 'detectObjectsFromImage()' function is used to detect objects observable in the given image:
+ * input_image , which can be a filepath or image numpy array in BGR
+ * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
+ * input_type (optional) , filepath/numpy array of the image. Acceptable values are "file" and "array"
+ * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
+ * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
+ * minimum_percentage_probability (optional, 30 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * nms_threshold (optional, o.45 by default) , option to set the Non-maximum suppression for the detection
+ * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
+ * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
+ * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Keras inference to run on the default graph
+
+
+ The values returned by this function depends on the parameters parsed. The possible values returnable
+ are stated as below
+ - If extract_detected_objects = False or at its default value and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = False or at its default value and output_type = 'array' ,
+ Then the function will return:
+
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = True and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+ 2. an array of string paths to the image of each object extracted from the image
+
+ - If extract_detected_objects = True and output_type = 'array', the the function will return:
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+ 3. an array of numpy arrays of each object detected in the image
+
+ :param input_image:
+ :param output_image_path:
+ :param input_type:
+ :param output_type:
+ :param extract_detected_objects:
+ :param minimum_percentage_probability:
+ :param nms_treshold:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param thread_safe:
+ :return image_frame:
+ :return output_objects_array:
+ :return detected_objects_image_array:
+ """
+
+ if self.__model is None:
+ raise ValueError("You must call the loadModel() function before making object detection.")
+ else:
+ if output_type == "file":
+ # from the image file, lets keep the directory and the filename, but remove its format
+ # if output_image_path is path/to/the/output/image.png
+ # then output_image_folder is path/to/the/output/image
+ # let's check if it is in the appropriated format soon to fail early
+ output_image_folder, n_subs = re.subn(r'\.(?:jpe?g|png|tif|webp|PPM|PGM)$', '', output_image_path, flags=re.I)
+ if n_subs == 0:
+ # if no substitution was done, the given output_image_path is not in a supported format,
+ # raise an error
+ raise ValueError("output_image_path must be the path where to write the image. "
+ "Therefore it must end as one the following: "
+ "'.jpg', '.png', '.tif', '.webp', '.PPM', '.PGM'. {} found".format(output_image_path))
+ elif extract_detected_objects:
+ # Results must be written as files and need to extract detected objects as images,
+ # let's create a folder to store the object's images
+ objects_dir = output_image_folder + "-objects"
+
+ os.makedirs(objects_dir, exist_ok=True)
+
+ self.__object_threshold = minimum_percentage_probability / 100
+ self.__nms_threshold = nms_treshold
+
+ output_objects_array = []
+ detected_objects_image_array = []
+
+ if input_type == "file":
+ image = cv2.imread(input_image)
+ elif input_type == "array":
+ image = input_image
+ else:
+ raise ValueError("input_type must be 'file' or 'array'. {} found".format(input_type))
+
+ image_frame = image.copy()
+
+ height, width, channels = image.shape
+
+ image = cv2.resize(image, (self.__input_size, self.__input_size))
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ image = image.astype("float32") / 255.
+
+ # expand the image to batch
+ image = np.expand_dims(image, 0)
+
+ if self.__model_type == "yolov3":
+ if thread_safe == True:
+ with K.get_session().graph.as_default():
+ yolo_results = self.__model.predict(image)
+ else:
+ yolo_results = self.__model.predict(image)
+
+ boxes = list()
+
+ for idx, result in enumerate(yolo_results):
+ box_set = self.__detection_utils.decode_netout(result[0], self.__model_anchors[idx],
+ self.__object_threshold, self.__input_size,
+ self.__input_size)
+ boxes += box_set
+
+ self.__detection_utils.correct_yolo_boxes(boxes, height, width, self.__input_size, self.__input_size)
+
+ self.__detection_utils.do_nms(boxes, self.__nms_threshold)
+
+ all_boxes, all_labels, all_scores = self.__detection_utils.get_boxes(boxes, self.__model_labels,
+ self.__object_threshold)
+
+ for object_box, object_label, object_score in zip(all_boxes, all_labels, all_scores):
+ each_object_details = dict()
+ each_object_details["name"] = object_label
+ each_object_details["percentage_probability"] = object_score
+
+ if object_box.xmin < 0:
+ object_box.xmin = 0
+ if object_box.ymin < 0:
+ object_box.ymin = 0
+
+ each_object_details["box_points"] = [object_box.xmin, object_box.ymin, object_box.xmax, object_box.ymax]
+ output_objects_array.append(each_object_details)
+
+ drawn_image = self.__detection_utils.draw_boxes_and_caption(image_frame.copy(), all_boxes, all_labels,
+ all_scores, show_names=display_object_name,
+ show_percentage=display_percentage_probability)
+
+ if extract_detected_objects:
+
+ for cnt, each_object in enumerate(output_objects_array):
+
+ splitted_image = image_frame[each_object["box_points"][1]:each_object["box_points"][3],
+ each_object["box_points"][0]:each_object["box_points"][2]]
+ if output_type == "file":
+ splitted_image_path = os.path.join(objects_dir, "{}-{:05d}.jpg".format(each_object["name"],
+ cnt))
+
+ cv2.imwrite(splitted_image_path, splitted_image)
+ detected_objects_image_array.append(splitted_image_path)
+ elif output_type == "array":
+ detected_objects_image_array.append(splitted_image.copy())
+
+ if output_type == "file":
+ # we already validated that the output_image_path is a supported by OpenCV one
+ cv2.imwrite(output_image_path, drawn_image)
+
+ if extract_detected_objects:
+ if output_type == "file":
+ return output_objects_array, detected_objects_image_array
+ elif output_type == "array":
+ return drawn_image, output_objects_array, detected_objects_image_array
+
+ else:
+ if output_type == "file":
+ return output_objects_array
+ elif output_type == "array":
+ return drawn_image, output_objects_array
+
+
+class CustomVideoObjectDetection:
+
+
+ """
+
+ This is the object detection class for videos and camera live stream inputs using your custom trained detection models. It provides support for your custom YOLOv3 models.
+
+ """
+
+ def __init__(self):
+ self.__model_type = ""
+ self.__model_path = ""
+ self.__model_labels = []
+ self.__model_anchors = []
+ self.__detection_config_json_path = ""
+ self.__model_loaded = False
+ self.__input_size = 416
+ self.__object_threshold = 0.4
+ self.__nms_threshold = 0.4
+ self.__detector = []
+ self.__detection_utils = CustomDetectionUtils(labels=[])
+
+ def setModelTypeAsYOLOv3(self):
+
+ """
+ 'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
+ :return:
+ """
+
+ self.__model_type = "yolov3"
+
+
+ def setModelPath(self, detection_model_path):
+ """
+ 'setModelPath' is used to specify the filepath to your custom detection model
+
+ :param detection_model_path:
+ :return:
+ """
+ self.__model_path = detection_model_path
+
+
+ def setJsonPath(self, configuration_json):
+ """
+ 'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model
+
+ :param configuration_json:
+ :return:
+ """
+ self.__detection_config_json_path = configuration_json
+
+ def loadModel(self):
+ """
+ 'loadModel' is used to load the model into the CustomVideoObjectDetection class
+
+ :return:
+ """
+
+ if (self.__model_loaded == False):
+ if(self.__model_type == "yolov3"):
+ detector = CustomObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(self.__model_path)
+ detector.setJsonPath(self.__detection_config_json_path)
+ detector.loadModel()
+
+ self.__detector = detector
+ self.__model_loaded = True
+
+
+ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
+ frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
+ display_percentage_probability=True, display_object_name=True, save_detected_video=True,
+ per_frame_function=None, per_second_function=None, per_minute_function=None,
+ video_complete_function=None, return_detected_frame=False, detection_timeout = None):
+
+
+
+
+ """
+
+ 'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictinary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ as the fifth value into the function
+
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+
+ :param input_file_path:
+ :param camera_input:
+ :param output_file_path:
+ :param frames_per_second:
+ :param frame_detection_interval:
+ :param minimum_percentage_probability:
+ :param log_progress:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param save_detected_video:
+ :param per_frame_function:
+ :param per_second_function:
+ :param per_minute_function:
+ :param video_complete_function:
+ :param return_detected_frame:
+ :param detection_timeout:
+ :return output_video_filepath:
+ :return counting:
+ :return output_objects_array:
+ :return output_objects_count:
+ :return detected_copy:
+ :return this_second_output_object_array:
+ :return this_second_counting_array:
+ :return this_second_counting:
+ :return this_minute_output_object_array:
+ :return this_minute_counting_array:
+ :return this_minute_counting:
+ :return this_video_output_object_array:
+ :return this_video_counting_array:
+ :return this_video_counting:
+ """
+
+ output_frames_dict = {}
+ output_frames_count_dict = {}
+
+ input_video = cv2.VideoCapture(input_file_path)
+ if (camera_input != None):
+ input_video = camera_input
+
+ output_video_filepath = output_file_path + '.avi'
+
+ frame_width = int(input_video.get(3))
+ frame_height = int(input_video.get(4))
+ output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+ frames_per_second,
+ (frame_width, frame_height))
+
+ counting = 0
+ predicted_numbers = None
+ scores = None
+ detections = None
+
+
+ detection_timeout_count = 0
+ video_frames_count = 0
+
+
+ if(self.__model_type == "yolov3"):
+
+
+
+ while (input_video.isOpened()):
+ ret, frame = input_video.read()
+
+ if (ret == True):
+
+ detected_frame = frame.copy()
+
+ video_frames_count += 1
+ if (detection_timeout != None):
+ if ((video_frames_count % frames_per_second) == 0):
+ detection_timeout_count += 1
+
+ if (detection_timeout_count >= detection_timeout):
+ break
+
+ output_objects_array = []
+
+ counting += 1
+
+ if (log_progress == True):
+ print("Processing Frame : ", str(counting))
+
+
+
+ check_frame_interval = counting % frame_detection_interval
+
+ if (counting == 1 or check_frame_interval == 0):
+ try:
+ detected_frame, output_objects_array = self.__detector.detectObjectsFromImage(
+ input_image=frame, input_type="array", output_type="array",
+ minimum_percentage_probability=minimum_percentage_probability,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name)
+ except:
+ None
+
+
+ output_frames_dict[counting] = output_objects_array
+
+ output_objects_count = {}
+ for eachItem in output_objects_array:
+ eachItemName = eachItem["name"]
+ try:
+ output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
+ except:
+ output_objects_count[eachItemName] = 1
+
+ output_frames_count_dict[counting] = output_objects_count
+
+
+ if (save_detected_video == True):
+ output_video.write(detected_frame)
+
+ if (counting == 1 or check_frame_interval == 0):
+ if (per_frame_function != None):
+ if (return_detected_frame == True):
+ per_frame_function(counting, output_objects_array, output_objects_count,
+ detected_frame)
+ elif (return_detected_frame == False):
+ per_frame_function(counting, output_objects_array, output_objects_count)
+
+ if (per_second_function != None):
+ if (counting != 1 and (counting % frames_per_second) == 0):
+
+ this_second_output_object_array = []
+ this_second_counting_array = []
+ this_second_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - frames_per_second)):
+ this_second_output_object_array.append(output_frames_dict[aa + 1])
+ this_second_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_second_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_second_counting[eachItem] = this_second_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_second_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_second_counting:
+ this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)
+
+ if (return_detected_frame == True):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting, detected_frame)
+
+ elif (return_detected_frame == False):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting)
+
+ if (per_minute_function != None):
+
+ if (counting != 1 and (counting % (frames_per_second * 60)) == 0):
+
+ this_minute_output_object_array = []
+ this_minute_counting_array = []
+ this_minute_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - (frames_per_second * 60))):
+ this_minute_output_object_array.append(output_frames_dict[aa + 1])
+ this_minute_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_minute_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_minute_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_minute_counting:
+ this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))
+
+ if (return_detected_frame == True):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting, detected_frame)
+
+ elif (return_detected_frame == False):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting)
+
+
+ else:
+ break
+
+ if (video_complete_function != None):
+
+ this_video_output_object_array = []
+ this_video_counting_array = []
+ this_video_counting = {}
+
+ for aa in range(counting):
+ this_video_output_object_array.append(output_frames_dict[aa + 1])
+ this_video_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_video_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_video_counting[eachItem] = this_video_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_video_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_video_counting:
+ this_video_counting[eachCountingItem] = this_video_counting[
+ eachCountingItem] / counting
+
+ video_complete_function(this_video_output_object_array, this_video_counting_array,
+ this_video_counting)
+
+ input_video.release()
+ output_video.release()
+
+ if (save_detected_video == True):
+ return output_video_filepath
+
+
+class BoundBox:
+ def __init__(self, xmin, ymin, xmax, ymax, objness=None, classes=None):
+ self.xmin = xmin
+ self.ymin = ymin
+ self.xmax = xmax
+ self.ymax = ymax
+ self.objness = objness
+ self.classes = classes
+ self.label = -1
+ self.score = -1
+
+ def get_label(self):
+ if self.label == -1:
+ self.label = np.argmax(self.classes)
+
+ return self.label
+
+ def get_score(self):
+ if self.score == -1:
+ self.score = self.classes[self.get_label()]
+
+ return self.score
+
+
+class CustomDetectionUtils:
+ def __init__(self, labels):
+ self.__labels = labels
+ self.__colors = []
+
+ for i in range(len(labels)):
+ color_space_values = np.random.randint(50, 255, size=(3,))
+ red, green, blue = color_space_values
+ red, green, blue = int(red), int(green), int(blue)
+ self.__colors.append([red, green, blue])
+
+ @staticmethod
+ def _sigmoid(x):
+ return 1. / (1. + np.exp(-x))
+
+ def decode_netout(self, netout, anchors, obj_thresh, net_h, net_w):
+ grid_h, grid_w = netout.shape[:2]
+ nb_box = 3
+ netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+ nb_class = netout.shape[-1] - 5
+ boxes = []
+ netout[..., :2] = self._sigmoid(netout[..., :2])
+ netout[..., 4:] = self._sigmoid(netout[..., 4:])
+ netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
+ netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+ for row in range(grid_h):
+ for col in range(grid_w):
+ for b in range(nb_box):
+ # 4th element is objectness score
+ objectness = netout[row, col, b, 4]
+
+ if objectness <= obj_thresh:
+ continue
+
+ # first 4 elements are x, y, w, and h
+ x, y, w, h = netout[row, col, b, :4]
+ x = (col + x) / grid_w # center position, unit: image width
+ y = (row + y) / grid_h # center position, unit: image height
+ w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+ h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+ # last elements are class probabilities
+ classes = netout[row, col, b, 5:]
+ box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes)
+ boxes.append(box)
+
+ return boxes
+
+ @staticmethod
+ def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+ new_w, new_h = net_w, net_h
+ for i in range(len(boxes)):
+ x_offset, x_scale = (net_w - new_w) / 2. / net_w, float(new_w) / net_w
+ y_offset, y_scale = (net_h - new_h) / 2. / net_h, float(new_h) / net_h
+ boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+ boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+ boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+ boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+ def _interval_overlap(self, interval_a, interval_b):
+ x1, x2 = interval_a
+ x3, x4 = interval_b
+ if x3 < x1:
+ if x4 < x1:
+ return 0
+ else:
+ return min(x2, x4) - x1
+ else:
+ if x2 < x3:
+ return 0
+ else:
+ return min(x2, x4) - x3
+
+ def bbox_iou(self, box1, box2):
+ intersect_w = self._interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+ intersect_h = self._interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+ intersect = intersect_w * intersect_h
+ w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
+ w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
+ union = w1 * h1 + w2 * h2 - intersect
+
+ try:
+ result = float(intersect) / float(union)
+ return result
+ except:
+ return 0.0
+
+ def do_nms(self, boxes, nms_thresh):
+ if len(boxes) > 0:
+ nb_class = len(boxes[0].classes)
+ else:
+ return
+
+ for c in range(nb_class):
+ sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+
+ for i in range(len(sorted_indices)):
+ index_i = sorted_indices[i]
+
+ if boxes[index_i].classes[c] == 0: continue
+
+ for j in range(i + 1, len(sorted_indices)):
+ index_j = sorted_indices[j]
+
+ if self.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+ boxes[index_j].classes[c] = 0
+
+ def get_boxes(self, boxes, labels, thresh):
+ v_boxes, v_labels, v_scores = list(), list(), list()
+ # enumerate all boxes
+ for box in boxes:
+ # enumerate all possible labels
+ for i in range(len(labels)):
+ # check if the threshold for this label is high enough
+ if box.classes[i] > thresh:
+ v_boxes.append(box)
+ v_labels.append(labels[i])
+ v_scores.append(box.classes[i] * 100)
+ # don't break, many labels may trigger for one box
+ return v_boxes, v_labels, v_scores
+
+ def label_color(self, label):
+ """ Return a color from a set of predefined colors. Contains 80 colors in total.
+
+ Args
+ label: The label to get the color for.
+
+ Returns
+ A list of three values representing a RGB color.
+
+ If no color is defined for a certain label, the color green is returned and a warning is printed.
+ """
+ if label < len(self.__colors):
+ return self.__colors[label]
+ else:
+ return 0, 255, 0
+
+ def draw_boxes_and_caption(self, image_frame, v_boxes, v_labels, v_scores, show_names=False, show_percentage=False):
+
+ for i in range(len(v_boxes)):
+ box = v_boxes[i]
+ y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
+ width, height = x2 - x1, y2 - y1
+ class_color = self.label_color(self.__labels.index(v_labels[i]))
+
+ image_frame = cv2.rectangle(image_frame, (x1, y1), (x2, y2), class_color, 2)
+
+ label = ""
+ if show_names and show_percentage:
+ label = "%s : %.3f" % (v_labels[i], v_scores[i])
+ elif show_names:
+ label = "%s" % (v_labels[i])
+ elif show_percentage:
+ label = "%.3f" % (v_scores[i])
+
+ if show_names or show_percentage:
+ b = np.array([x1, y1, x2, y2]).astype(int)
+ cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (200, 0, 0), 3)
+ cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)
+
+ return image_frame
diff --git a/imageai_tf_deprecated/Detection/Custom/callbacks.py b/imageai_tf_deprecated/Detection/Custom/callbacks.py
new file mode 100644
index 00000000..e487c80c
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/callbacks.py
@@ -0,0 +1,71 @@
+from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
+import tensorflow as tf
+import numpy as np
+import warnings
+
+class CustomTensorBoard(TensorBoard):
+ """ to log the loss after each batch
+ """
+ def __init__(self, log_every=1, **kwargs):
+ super(CustomTensorBoard, self).__init__(**kwargs)
+ self.log_every = log_every
+ self.counter = 0
+
+ def on_batch_end(self, batch, logs=None):
+ self.counter+=1
+ if self.counter%self.log_every==0:
+ for name, value in logs.items():
+ if name in ['batch', 'size']:
+ continue
+ summary = tf.Summary()
+ summary_value = summary.value.add()
+ summary_value.simple_value = value.item()
+ summary_value.tag = name
+ self.writer.add_summary(summary, self.counter)
+ self.writer.flush()
+
+ super(CustomTensorBoard, self).on_batch_end(batch, logs)
+
+class CustomModelCheckpoint(ModelCheckpoint):
+ """ to save the template model, not the multi-GPU model
+ """
+ def __init__(self, model_to_save, **kwargs):
+ super(CustomModelCheckpoint, self).__init__(**kwargs)
+ self.model_to_save = model_to_save
+
+ def on_epoch_end(self, epoch, logs=None):
+ logs = logs or {}
+ self.epochs_since_last_save += 1
+ if self.epochs_since_last_save >= self.period:
+ self.epochs_since_last_save = 0
+ filepath = self.filepath.format(epoch=epoch + 1, **logs)
+ if self.save_best_only:
+ current = logs.get(self.monitor)
+ if current is None:
+ warnings.warn('Can save best model only with %s available, '
+ 'skipping.' % (self.monitor), RuntimeWarning)
+ else:
+ if self.monitor_op(current, self.best):
+ if self.verbose > 0:
+ print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
+ ' saving model to %s'
+ % (epoch + 1, self.monitor, self.best,
+ current, filepath))
+ self.best = current
+ if self.save_weights_only:
+ self.model_to_save.save_weights(filepath, overwrite=True)
+ else:
+ self.model_to_save.save(filepath, overwrite=True)
+ else:
+ if self.verbose > 0:
+ print('\nEpoch %05d: %s did not improve from %0.5f' %
+ (epoch + 1, self.monitor, self.best))
+ else:
+ if self.verbose > 0:
+ print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
+ if self.save_weights_only:
+ self.model_to_save.save_weights(filepath, overwrite=True)
+ else:
+ self.model_to_save.save(filepath, overwrite=True)
+
+ super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Detection/Custom/evaluate.py b/imageai_tf_deprecated/Detection/Custom/evaluate.py
new file mode 100644
index 00000000..0edf7bba
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/evaluate.py
@@ -0,0 +1,66 @@
+#! /usr/bin/env python
+
+import argparse
+import os
+import json
+from imageai.Detection.Custom.voc import parse_voc_annotation
+from imageai.Detection.Custom.generator import BatchGenerator
+from imageai.Detection.Custom.utils.utils import normalize, evaluate
+from keras.models import load_model
+
+
+def _main_(args):
+ config_path = args.conf
+
+ with open(config_path) as config_buffer:
+ config = json.loads(config_buffer.read())
+
+ ###############################
+ # Create the validation generator
+ ###############################
+ valid_ints, labels = parse_voc_annotation(
+ config['valid']['valid_annot_folder'],
+ config['valid']['valid_image_folder'],
+ config['valid']['cache_name'],
+ config['model']['labels']
+ )
+
+ labels = labels.keys() if len(config['model']['labels']) == 0 else config['model']['labels']
+ labels = sorted(labels)
+
+ valid_generator = BatchGenerator(
+ instances = valid_ints,
+ anchors = config['model']['anchors'],
+ labels = labels,
+ downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image = 0,
+ batch_size = config['train']['batch_size'],
+ min_net_size = config['model']['min_input_size'],
+ max_net_size = config['model']['max_input_size'],
+ shuffle = True,
+ jitter = 0.0,
+ norm = normalize
+ )
+
+ ###############################
+ # Load the model and do evaluation
+ ###############################
+ os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
+
+ infer_model = load_model(config['train']['saved_weights_name'])
+
+ # compute mAP for all the classes
+ average_precisions = evaluate(infer_model, valid_generator)
+
+ # print the score
+ for label, average_precision in average_precisions.items():
+ print(labels[label] + ': {:.4f}'.format(average_precision))
+ print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
+
+
+if __name__ == '__main__':
+ argparser = argparse.ArgumentParser(description='Evaluate YOLO_v3 model on any dataset')
+ argparser.add_argument('-c', '--conf', help='path to configuration file')
+
+ args = argparser.parse_args()
+ _main_(args)
diff --git a/imageai_tf_deprecated/Detection/Custom/gen_anchors.py b/imageai_tf_deprecated/Detection/Custom/gen_anchors.py
new file mode 100644
index 00000000..693e6b21
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/gen_anchors.py
@@ -0,0 +1,122 @@
+import random
+import numpy as np
+
+from imageai.Detection.Custom.voc import parse_voc_annotation
+
+
+def IOU(ann, centroids):
+ w, h = ann
+ similarities = []
+
+ for centroid in centroids:
+ c_w, c_h = centroid
+
+ if c_w >= w and c_h >= h:
+ similarity = w*h/(c_w*c_h)
+ elif c_w >= w and c_h <= h:
+ similarity = w*c_h/(w*h + (c_w-w)*c_h)
+ elif c_w <= w and c_h >= h:
+ similarity = c_w*h/(w*h + c_w*(c_h-h))
+ else: #means both w,h are bigger than c_w and c_h respectively
+ similarity = (c_w*c_h)/(w*h)
+ similarities.append(similarity) # will become (k,) shape
+
+ return np.array(similarities)
+
+
+def avg_IOU(anns, centroids):
+ n,d = anns.shape
+ sum = 0.
+
+ for i in range(anns.shape[0]):
+ sum+= max(IOU(anns[i], centroids))
+
+ return sum/n
+
+
+def run_kmeans(ann_dims, anchor_num):
+ ann_num = ann_dims.shape[0]
+ iterations = 0
+ prev_assignments = np.ones(ann_num)*(-1)
+ iteration = 0
+ old_distances = np.zeros((ann_num, anchor_num))
+
+ indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
+ centroids = ann_dims[indices]
+ anchor_dim = ann_dims.shape[1]
+
+ while True:
+ distances = []
+ iteration += 1
+ for i in range(ann_num):
+ d = 1 - IOU(ann_dims[i], centroids)
+ distances.append(d)
+ distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
+
+ #assign samples to centroids
+ assignments = np.argmin(distances,axis=1)
+
+ if (assignments == prev_assignments).all() :
+ return centroids
+
+ #calculate new centroids
+ centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
+ for i in range(ann_num):
+ centroid_sums[assignments[i]]+=ann_dims[i]
+ for j in range(anchor_num):
+ centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
+
+ prev_assignments = assignments.copy()
+ old_distances = distances.copy()
+
+
+def generateAnchors(train_annotation_folder, train_image_folder, train_cache_file, model_labels):
+
+ print("Generating anchor boxes for training images and annotation...")
+ num_anchors = 9
+
+ train_imgs, train_labels = parse_voc_annotation(
+ train_annotation_folder,
+ train_image_folder,
+ train_cache_file,
+ model_labels
+ )
+
+ # run k_mean to find the anchors
+ annotation_dims = []
+ for image in train_imgs:
+
+ for obj in image['object']:
+ relative_w = (float(obj['xmax']) - float(obj['xmin']))/image['width']
+ relative_h = (float(obj["ymax"]) - float(obj['ymin']))/image['height']
+ annotation_dims.append(tuple(map(float, (relative_w,relative_h))))
+
+ annotation_dims = np.array(annotation_dims)
+ centroids = run_kmeans(annotation_dims, num_anchors)
+
+ # write anchors to file
+ print('Average IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
+
+ anchors = centroids.copy()
+
+ widths = anchors[:, 0]
+ sorted_indices = np.argsort(widths)
+
+ anchor_array = []
+ reverse_anchor_array = []
+ out_string = ""
+ r = "anchors: ["
+ for i in sorted_indices:
+ anchor_array.append(int(anchors[i, 0] * 416))
+ anchor_array.append(int(anchors[i, 1] * 416))
+
+ out_string += str(int(anchors[i, 0] * 416)) + ',' + str(int(anchors[i, 1] * 416)) + ', '
+
+ reverse_anchor_array.append(anchor_array[12:18])
+ reverse_anchor_array.append(anchor_array[6:12])
+ reverse_anchor_array.append(anchor_array[0:6])
+
+ print("Anchor Boxes generated.")
+ return anchor_array, reverse_anchor_array
+
+
diff --git a/imageai_tf_deprecated/Detection/Custom/generator.py b/imageai_tf_deprecated/Detection/Custom/generator.py
new file mode 100644
index 00000000..ceb10c7c
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/generator.py
@@ -0,0 +1,232 @@
+import cv2
+import copy
+import numpy as np
+from tensorflow.keras.utils import Sequence
+from imageai.Detection.Custom.utils.bbox import BoundBox, bbox_iou
+from imageai.Detection.Custom.utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes
+
+class BatchGenerator(Sequence):
+ def __init__(self,
+ instances,
+ anchors,
+ labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=30,
+ batch_size=1,
+ min_net_size=320,
+ max_net_size=608,
+ shuffle=True,
+ jitter=True,
+ norm=None
+ ):
+ self.instances = instances
+ self.batch_size = batch_size
+ self.labels = labels
+ self.downsample = downsample
+ self.max_box_per_image = max_box_per_image
+ self.min_net_size = (min_net_size//self.downsample)*self.downsample
+ self.max_net_size = (max_net_size//self.downsample)*self.downsample
+ self.shuffle = shuffle
+ self.jitter = jitter
+ self.norm = norm
+ self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
+ self.net_h = 416
+ self.net_w = 416
+
+ if shuffle: np.random.shuffle(self.instances)
+
+ def __len__(self):
+ return int(np.ceil(float(len(self.instances))/self.batch_size))
+
+ def __getitem__(self, idx):
+ # get image input size, change every 10 batches
+ net_h, net_w = self._get_net_size(idx)
+ base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample
+
+ # determine the first and the last indices of the batch
+ l_bound = idx * self.batch_size
+ r_bound = (idx+1) * self.batch_size
+
+ if r_bound > len(self.instances):
+ r_bound = len(self.instances)
+ l_bound = r_bound - self.batch_size
+
+ x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images
+ t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes
+
+ # initialize the inputs and the outputs
+ yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
+ yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
+ yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
+ yolos = [yolo_3, yolo_2, yolo_1]
+
+ dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
+ dummy_yolo_2 = np.zeros_like(dummy_yolo_1)
+ dummy_yolo_3 = np.zeros_like(dummy_yolo_1)
+
+ instance_count = 0
+ true_box_index = 0
+
+ # do the logic to fill in the inputs and the output
+ for train_instance in self.instances[l_bound:r_bound]:
+ # augment input image and fix object's position and size
+ img, all_objs = self._aug_image(train_instance, net_h, net_w)
+
+ for obj in all_objs:
+ # find the best anchor box for this object
+ max_anchor = None
+ max_index = -1
+ max_iou = -1
+
+ shifted_box = BoundBox(0,
+ 0,
+ obj['xmax']-obj['xmin'],
+ obj['ymax']-obj['ymin'])
+
+ for i in range(len(self.anchors)):
+ anchor = self.anchors[i]
+ iou = bbox_iou(shifted_box, anchor)
+
+ if max_iou < iou:
+ max_anchor = anchor
+ max_index = i
+ max_iou = iou
+
+ # determine the yolo to be responsible for this bounding box
+ yolo = yolos[max_index//3]
+ grid_h, grid_w = yolo.shape[1:3]
+
+ # determine the position of the bounding box on the grid
+ center_x = .5*(obj['xmin'] + obj['xmax'])
+ center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
+ center_y = .5*(obj['ymin'] + obj['ymax'])
+ center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
+
+ # determine the sizes of the bounding box
+ w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
+ h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h
+
+ box = [center_x, center_y, w, h]
+
+ # determine the index of the label
+ obj_indx = self.labels.index(obj['name'])
+
+ # determine the location of the cell responsible for this object
+ grid_x = int(np.floor(center_x))
+ grid_y = int(np.floor(center_y))
+
+ # assign ground truth x, y, w, h, confidence and class probs to y_batch
+ yolo[instance_count, grid_y, grid_x, max_index%3] = 0
+ yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
+ yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1.
+ yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1
+
+ # assign the true box to t_batch
+ true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
+ t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
+
+ true_box_index += 1
+ true_box_index = true_box_index % self.max_box_per_image
+
+ # assign input image to x_batch
+ if self.norm != None:
+ x_batch[instance_count] = self.norm(img)
+ else:
+ # plot image and bounding boxes for sanity check
+ for obj in all_objs:
+ cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
+ cv2.putText(img, obj['name'],
+ (obj['xmin']+2, obj['ymin']+12),
+ 0, 1.2e-3 * img.shape[0],
+ (0,255,0), 2)
+
+ x_batch[instance_count] = img
+
+ # increase instance counter in the current batch
+ instance_count += 1
+
+ return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
+
+ def _get_net_size(self, idx):
+ if idx % 10 == 0:
+ net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \
+ self.max_net_size/self.downsample+1)
+
+ self.net_h, self.net_w = net_size, net_size
+ return self.net_h, self.net_w
+
+ def _aug_image(self, instance, net_h, net_w):
+ image_name = instance['filename']
+ image = cv2.imread(image_name) # BGR image
+
+ if image is None:
+ print('Cannot find ', image_name)
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # RGB image
+
+ image_h, image_w, _ = image.shape
+
+ # determine the amount of scaling and cropping
+ dw = self.jitter * image_w
+ dh = self.jitter * image_h
+
+ new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh))
+ scale = np.random.uniform(0.25, 2)
+
+ if new_ar < 1:
+ new_h = int(scale * net_h)
+ new_w = int(net_h * new_ar)
+ else:
+ new_w = int(scale * net_w)
+ new_h = int(net_w / new_ar)
+
+ dx = int(np.random.uniform(0, net_w - new_w))
+ dy = int(np.random.uniform(0, net_h - new_h))
+
+ # apply scaling and cropping
+ im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
+
+ # randomly distort hsv space
+ im_sized = random_distort_image(im_sized)
+
+ # randomly flip
+ flip = np.random.randint(2)
+ im_sized = random_flip(im_sized, flip)
+
+ # correct the size and pos of bounding boxes
+ all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
+
+ return im_sized, all_objs
+
+ def on_epoch_end(self):
+ if self.shuffle:
+ np.random.shuffle(self.instances)
+
+ def num_classes(self):
+ return len(self.labels)
+
+ def size(self):
+ return len(self.instances)
+
+ def get_anchors(self):
+ anchors = []
+
+ for anchor in self.anchors:
+ anchors += [anchor.xmax, anchor.ymax]
+
+ return anchors
+
+ def load_annotation(self, i):
+ annots = []
+
+ for obj in self.instances[i]['object']:
+ annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
+ annots += [annot]
+
+ if len(annots) == 0:
+ annots = [[]]
+
+ return np.array(annots)
+
+ def load_image(self, i):
+ return cv2.imread(self.instances[i]['filename']) # BGR image
diff --git a/imageai/Detection/keras_retinanet/utils/__init__.py b/imageai_tf_deprecated/Detection/Custom/utils/__init__.py
similarity index 100%
rename from imageai/Detection/keras_retinanet/utils/__init__.py
rename to imageai_tf_deprecated/Detection/Custom/utils/__init__.py
diff --git a/imageai_tf_deprecated/Detection/Custom/utils/bbox.py b/imageai_tf_deprecated/Detection/Custom/utils/bbox.py
new file mode 100644
index 00000000..630d4c52
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/utils/bbox.py
@@ -0,0 +1,92 @@
+import numpy as np
+import os
+import cv2
+from .colors import get_color
+
+class BoundBox:
+ def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
+ self.xmin = xmin
+ self.ymin = ymin
+ self.xmax = xmax
+ self.ymax = ymax
+
+ self.c = c
+ self.classes = classes
+
+ self.label = -1
+ self.score = -1
+
+ def get_label(self):
+ if self.label == -1:
+ self.label = np.argmax(self.classes)
+
+ return self.label
+
+ def get_score(self):
+ if self.score == -1:
+ self.score = self.classes[self.get_label()]
+
+ return self.score
+
+def _interval_overlap(interval_a, interval_b):
+ x1, x2 = interval_a
+ x3, x4 = interval_b
+
+ if x3 < x1:
+ if x4 < x1:
+ return 0
+ else:
+ return min(x2,x4) - x1
+ else:
+ if x2 < x3:
+ return 0
+ else:
+ return min(x2,x4) - x3
+
+def bbox_iou(box1, box2):
+ intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+ intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+
+ intersect = intersect_w * intersect_h
+
+ w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
+ w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
+
+ union = w1*h1 + w2*h2 - intersect
+
+ if(union <= 0):
+ union = 1
+
+ return float(intersect) / float(union)
+
+def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
+ for box in boxes:
+ label_str = ''
+ label = -1
+
+ for i in range(len(labels)):
+ if box.classes[i] > obj_thresh:
+ if label_str != '': label_str += ', '
+ label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
+ label = i
+ if not quiet: print(label_str)
+
+ if label >= 0:
+ text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
+ width, height = text_size[0][0], text_size[0][1]
+ region = np.array([[box.xmin-3, box.ymin],
+ [box.xmin-3, box.ymin-height-26],
+ [box.xmin+width+13, box.ymin-height-26],
+ [box.xmin+width+13, box.ymin]], dtype='int32')
+
+ cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5)
+ cv2.fillPoly(img=image, pts=[region], color=get_color(label))
+ cv2.putText(img=image,
+ text=label_str,
+ org=(box.xmin+13, box.ymin - 13),
+ fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+ fontScale=1e-3 * image.shape[0],
+ color=(0,0,0),
+ thickness=2)
+
+ return image
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Detection/Custom/utils/colors.py b/imageai_tf_deprecated/Detection/Custom/utils/colors.py
new file mode 100644
index 00000000..2983a98a
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/utils/colors.py
@@ -0,0 +1,96 @@
+def get_color(label):
+ """ Return a color from a set of predefined colors. Contains 80 colors in total.
+ code originally from https://github.com/fizyr/keras-retinanet/
+ Args
+ label: The label to get the color for.
+ Returns
+ A list of three values representing a RGB color.
+ """
+ if label < len(colors):
+ return colors[label]
+ else:
+ print('Label {} has no color, returning default.'.format(label))
+ return (0, 255, 0)
+
+colors = [
+ [31 , 0 , 255] ,
+ [0 , 159 , 255] ,
+ [255 , 95 , 0] ,
+ [255 , 19 , 0] ,
+ [255 , 0 , 0] ,
+ [255 , 38 , 0] ,
+ [0 , 255 , 25] ,
+ [255 , 0 , 133] ,
+ [255 , 172 , 0] ,
+ [108 , 0 , 255] ,
+ [0 , 82 , 255] ,
+ [0 , 255 , 6] ,
+ [255 , 0 , 152] ,
+ [223 , 0 , 255] ,
+ [12 , 0 , 255] ,
+ [0 , 255 , 178] ,
+ [108 , 255 , 0] ,
+ [184 , 0 , 255] ,
+ [255 , 0 , 76] ,
+ [146 , 255 , 0] ,
+ [51 , 0 , 255] ,
+ [0 , 197 , 255] ,
+ [255 , 248 , 0] ,
+ [255 , 0 , 19] ,
+ [255 , 0 , 38] ,
+ [89 , 255 , 0] ,
+ [127 , 255 , 0] ,
+ [255 , 153 , 0] ,
+ [0 , 255 , 255] ,
+ [0 , 255 , 216] ,
+ [0 , 255 , 121] ,
+ [255 , 0 , 248] ,
+ [70 , 0 , 255] ,
+ [0 , 255 , 159] ,
+ [0 , 216 , 255] ,
+ [0 , 6 , 255] ,
+ [0 , 63 , 255] ,
+ [31 , 255 , 0] ,
+ [255 , 57 , 0] ,
+ [255 , 0 , 210] ,
+ [0 , 255 , 102] ,
+ [242 , 255 , 0] ,
+ [255 , 191 , 0] ,
+ [0 , 255 , 63] ,
+ [255 , 0 , 95] ,
+ [146 , 0 , 255] ,
+ [184 , 255 , 0] ,
+ [255 , 114 , 0] ,
+ [0 , 255 , 235] ,
+ [255 , 229 , 0] ,
+ [0 , 178 , 255] ,
+ [255 , 0 , 114] ,
+ [255 , 0 , 57] ,
+ [0 , 140 , 255] ,
+ [0 , 121 , 255] ,
+ [12 , 255 , 0] ,
+ [255 , 210 , 0] ,
+ [0 , 255 , 44] ,
+ [165 , 255 , 0] ,
+ [0 , 25 , 255] ,
+ [0 , 255 , 140] ,
+ [0 , 101 , 255] ,
+ [0 , 255 , 82] ,
+ [223 , 255 , 0] ,
+ [242 , 0 , 255] ,
+ [89 , 0 , 255] ,
+ [165 , 0 , 255] ,
+ [70 , 255 , 0] ,
+ [255 , 0 , 172] ,
+ [255 , 76 , 0] ,
+ [203 , 255 , 0] ,
+ [204 , 0 , 255] ,
+ [255 , 0 , 229] ,
+ [255 , 133 , 0] ,
+ [127 , 0 , 255] ,
+ [0 , 235 , 255] ,
+ [0 , 255 , 197] ,
+ [255 , 0 , 191] ,
+ [0 , 44 , 255] ,
+ [50 , 255 , 0]
+]
diff --git a/imageai_tf_deprecated/Detection/Custom/utils/image.py b/imageai_tf_deprecated/Detection/Custom/utils/image.py
new file mode 100644
index 00000000..600266ac
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/utils/image.py
@@ -0,0 +1,99 @@
+import cv2
+import numpy as np
+import copy
+
+
+def _rand_scale(scale):
+ scale = np.random.uniform(1, scale)
+ return scale if np.random.randint(2) == 0 else 1./scale
+
+
+def _constrain(min_v, max_v, value):
+
+ if value < min_v:
+ return min_v
+
+ if value > max_v:
+ return max_v
+
+ return value
+
+
+def random_flip(image, flip):
+ if flip == 1:
+ return cv2.flip(image, 1)
+ return image
+
+
+def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
+ boxes = copy.deepcopy(boxes)
+
+ # randomize boxes' order
+ np.random.shuffle(boxes)
+
+ # correct sizes and positions
+ sx, sy = float(new_w)/image_w, float(new_h)/image_h
+ zero_boxes = []
+
+ for i in range(len(boxes)):
+ boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
+ boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
+ boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
+ boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))
+
+ if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
+ zero_boxes += [i]
+ continue
+
+ if flip == 1:
+ swap = boxes[i]['xmin']
+ boxes[i]['xmin'] = net_w - boxes[i]['xmax']
+ boxes[i]['xmax'] = net_w - swap
+
+ boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]
+
+ return boxes
+
+
+def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
+ # determine scale factors
+ dhue = np.random.uniform(-hue, hue)
+ dsat = _rand_scale(saturation)
+ dexp = _rand_scale(exposure)
+
+ # convert RGB space to HSV space
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
+
+ # change satuation and exposure
+ image[:, :, 1] *= dsat
+ image[:, :, 2] *= dexp
+
+ # change hue
+ image[:, :, 0] += dhue
+ image[:, :, 0] -= (image[:, :, 0] > 180) * 180
+ image[:, :, 0] += (image[:, :, 0] < 0) * 180
+
+ # convert back to RGB from HSV
+ return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)
+
+
+def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):
+
+ im_sized = cv2.resize(image, (new_w, new_h))
+
+ if dx > 0:
+ im_sized = np.pad(im_sized, ((0, 0), (dx, 0), (0, 0)), mode='constant', constant_values=127)
+ else:
+ im_sized = im_sized[:, -dx:, :]
+ if (new_w + dx) < net_w:
+ im_sized = np.pad(im_sized, ((0, 0), (0, net_w - (new_w+dx)), (0, 0)), mode='constant', constant_values=127)
+
+ if dy > 0:
+ im_sized = np.pad(im_sized, ((dy, 0), (0, 0), (0, 0)), mode='constant', constant_values=127)
+ else:
+ im_sized = im_sized[-dy:, :, :]
+
+ if (new_h + dy) < net_h:
+ im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0, 0), (0, 0)), mode='constant', constant_values=127)
+
+ return im_sized[:net_h, :net_w, :]
diff --git a/imageai_tf_deprecated/Detection/Custom/utils/multi_gpu_model.py b/imageai_tf_deprecated/Detection/Custom/utils/multi_gpu_model.py
new file mode 100644
index 00000000..3a5e3fc1
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/utils/multi_gpu_model.py
@@ -0,0 +1,60 @@
+from keras.layers import Lambda, concatenate
+from keras.models import Model
+import tensorflow as tf
+
+
+def multi_gpu_model(model, gpus):
+ if isinstance(gpus, (list, tuple)):
+ num_gpus = len(gpus)
+ target_gpu_ids = gpus
+ else:
+ num_gpus = gpus
+ target_gpu_ids = range(num_gpus)
+
+ def get_slice(data, i, parts):
+ shape = tf.shape(data)
+ batch_size = shape[:1]
+ input_shape = shape[1:]
+ step = batch_size // parts
+ if i == num_gpus - 1:
+ size = batch_size - step * i
+ else:
+ size = step
+ size = tf.concat([size, input_shape], axis=0)
+ stride = tf.concat([step, input_shape * 0], axis=0)
+ start = stride * i
+ return tf.slice(data, start, size)
+
+ all_outputs = []
+ for i in range(len(model.outputs)):
+ all_outputs.append([])
+
+ # Place a copy of the model on each GPU,
+ # each getting a slice of the inputs.
+ for i, gpu_id in enumerate(target_gpu_ids):
+ with tf.device('/gpu:%d' % gpu_id):
+ with tf.name_scope('replica_%d' % gpu_id):
+ inputs = []
+ # Retrieve a slice of the input.
+ for x in model.inputs:
+ input_shape = tuple(x.get_shape().as_list())[1:]
+ slice_i = Lambda(get_slice, output_shape=input_shape,
+ arguments={'i': i, 'parts': num_gpus})(x)
+ inputs.append(slice_i)
+
+ # Apply model on slice
+ # (creating a model replica on the target device).
+ outputs = model(inputs)
+ if not isinstance(outputs, list):
+ outputs = [outputs]
+
+ # Save the outputs for merging back together later.
+ for o in range(len(outputs)):
+ all_outputs[o].append(outputs[o])
+
+ # Merge outputs on CPU.
+ with tf.device('/cpu:0'):
+ merged = []
+ for name, outputs in zip(model.output_names, all_outputs):
+ merged.append(concatenate(outputs, axis=0, name=name))
+ return Model(model.inputs, merged)
diff --git a/imageai_tf_deprecated/Detection/Custom/utils/utils.py b/imageai_tf_deprecated/Detection/Custom/utils/utils.py
new file mode 100644
index 00000000..e9dfac18
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/utils/utils.py
@@ -0,0 +1,336 @@
+import cv2
+import numpy as np
+import os
+from .bbox import BoundBox, bbox_iou
+from scipy.special import expit
+
+
+def _sigmoid(x):
+ return expit(x)
+
+
+def makedirs(path):
+ try:
+ os.makedirs(path)
+ except OSError:
+ if not os.path.isdir(path):
+ raise
+
+
+def evaluate(model,
+ generator,
+ iou_threshold,
+ obj_thresh,
+ nms_thresh,
+ net_h=416,
+ net_w=416,
+ save_path=None):
+ """ Evaluate a given dataset using a given model.
+ code originally from https://github.com/fizyr/keras-retinanet
+
+ # Arguments
+ model : The model to evaluate.
+ generator : The generator that represents the dataset to evaluate.
+ iou_threshold : The threshold used to consider when a detection is positive or negative.
+ obj_thresh : The threshold used to distinguish between object and non-object
+ nms_thresh : The threshold used to determine whether two detections are duplicates
+ net_h : The height of the input image to the model, higher value results in better accuracy
+ net_w : The width of the input image to the model
+ save_path : The path to save images with visualized detections to.
+ # Returns
+ A dict mapping class names to mAP scores.
+ """
+ # gather all detections and annotations
+ all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+
+ for i in range(generator.size()):
+ raw_image = [generator.load_image(i)]
+
+ # make the boxes and the labels
+ pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
+
+ score = np.array([box.get_score() for box in pred_boxes])
+ pred_labels = np.array([box.label for box in pred_boxes])
+
+ if len(pred_boxes) > 0:
+ pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes])
+ else:
+ pred_boxes = np.array([[]])
+
+ # sort the boxes and the labels according to scores
+ score_sort = np.argsort(-score)
+ pred_labels = pred_labels[score_sort]
+ pred_boxes = pred_boxes[score_sort]
+
+ # copy detections to all_detections
+ for label in range(generator.num_classes()):
+ all_detections[i][label] = pred_boxes[pred_labels == label, :]
+
+ annotations = generator.load_annotation(i)
+
+ # copy detections to all_annotations
+ for label in range(generator.num_classes()):
+ all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
+
+ # compute mAP by comparing all detections and all annotations
+ average_precisions = {}
+
+ for label in range(generator.num_classes()):
+ false_positives = np.zeros((0,))
+ true_positives = np.zeros((0,))
+ scores = np.zeros((0,))
+ num_annotations = 0.0
+
+ for i in range(generator.size()):
+ detections = all_detections[i][label]
+ annotations = all_annotations[i][label]
+ num_annotations += annotations.shape[0]
+ detected_annotations = []
+
+ for d in detections:
+ scores = np.append(scores, d[4])
+
+ if annotations.shape[0] == 0:
+ false_positives = np.append(false_positives, 1)
+ true_positives = np.append(true_positives, 0)
+ continue
+
+ overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
+ assigned_annotation = np.argmax(overlaps, axis=1)
+ max_overlap = overlaps[0, assigned_annotation]
+
+ if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
+ false_positives = np.append(false_positives, 0)
+ true_positives = np.append(true_positives, 1)
+ detected_annotations.append(assigned_annotation)
+ else:
+ false_positives = np.append(false_positives, 1)
+ true_positives = np.append(true_positives, 0)
+
+ # no annotations -> AP for this class is 0 (is this correct?)
+ if num_annotations == 0:
+ average_precisions[label] = 0
+ continue
+
+ # sort by score
+ indices = np.argsort(-scores)
+ false_positives = false_positives[indices]
+ true_positives = true_positives[indices]
+
+ # compute false positives and true positives
+ false_positives = np.cumsum(false_positives)
+ true_positives = np.cumsum(true_positives)
+
+ # compute recall and precision
+ recall = true_positives / num_annotations
+ precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
+
+ # compute average precision
+ average_precision = compute_ap(recall, precision)
+ average_precisions[label] = average_precision
+
+ return average_precisions
+
+
+def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+ if (float(net_w)/image_w) < (float(net_h)/image_h):
+ new_w = net_w
+ new_h = (image_h*net_w)/image_w
+ else:
+ new_h = net_w
+ new_w = (image_w*net_h)/image_h
+
+ for i in range(len(boxes)):
+ x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
+ y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
+
+ boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+ boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+ boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+ boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+
+def do_nms(boxes, nms_thresh):
+ if len(boxes) > 0:
+ nb_class = len(boxes[0].classes)
+ else:
+ return
+
+ for c in range(nb_class):
+ sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+
+ for i in range(len(sorted_indices)):
+ index_i = sorted_indices[i]
+
+ if boxes[index_i].classes[c] == 0: continue
+
+ for j in range(i+1, len(sorted_indices)):
+ index_j = sorted_indices[j]
+
+ if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+ boxes[index_j].classes[c] = 0
+
+
+def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
+ grid_h, grid_w = netout.shape[:2]
+ nb_box = 3
+ netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+ nb_class = netout.shape[-1] - 5
+
+ boxes = []
+
+ netout[..., :2] = _sigmoid(netout[..., :2])
+ netout[..., 4] = _sigmoid(netout[..., 4])
+ netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
+ netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+ for i in range(grid_h*grid_w):
+ row = i // grid_w
+ col = i % grid_w
+
+ for b in range(nb_box):
+ # 4th element is objectness score
+ objectness = netout[row, col, b, 4]
+
+ if objectness <= obj_thresh:
+ continue
+
+ # first 4 elements are x, y, w, and h
+ x, y, w, h = netout[row, col, b, :4]
+
+ x = (col + x) / grid_w # center position, unit: image width
+ y = (row + y) / grid_h # center position, unit: image height
+ w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+ h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+
+ # last elements are class probabilities
+ classes = netout[row, col, b, 5:]
+
+ box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
+
+ boxes.append(box)
+
+ return boxes
+
+
+def preprocess_input(image, net_h, net_w):
+ new_h, new_w, _ = image.shape
+
+ # determine the new size of the image
+ if (float(net_w)/new_w) < (float(net_h)/new_h):
+ new_h = (new_h * net_w)//new_w
+ new_w = net_w
+ else:
+ new_w = (new_w * net_h)//new_h
+ new_h = net_h
+
+ # resize the image to the new size
+ resized = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)/255., (new_w, new_h))
+
+ # embed the image into the standard letter box
+ new_image = np.ones((net_h, net_w, 3)) * 0.5
+ new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
+ new_image = np.expand_dims(new_image, 0)
+
+ return new_image
+
+
+def normalize(image):
+ return image/255.
+
+
+def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
+ image_h, image_w, _ = images[0].shape
+ nb_images = len(images)
+ batch_input = np.zeros((nb_images, net_h, net_w, 3))
+
+ # preprocess the input
+ for i in range(nb_images):
+ batch_input[i] = preprocess_input(images[i], net_h, net_w)
+
+ # run the prediction
+ batch_output = model.predict_on_batch(batch_input)
+ batch_boxes = [None]*nb_images
+
+ for i in range(nb_images):
+ yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
+ boxes = []
+
+ # decode the output of the network
+ for j in range(len(yolos)):
+ yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
+ boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
+
+ # correct the sizes of the bounding boxes
+ correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+
+ # suppress non-maximal boxes
+ do_nms(boxes, nms_thresh)
+
+ batch_boxes[i] = boxes
+
+ return batch_boxes
+
+
+def compute_overlap(a, b):
+ """
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+ Parameters
+ ----------
+ a: (N, 4) ndarray of float
+ b: (K, 4) ndarray of float
+ Returns
+ -------
+ overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ """
+ area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+ iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
+ ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
+
+ iw = np.maximum(iw, 0)
+ ih = np.maximum(ih, 0)
+
+ ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
+
+ ua = np.maximum(ua, np.finfo(float).eps)
+
+ intersection = iw * ih
+
+ return intersection / ua
+
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves.
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+ # Arguments
+ recall: The recall curve (list).
+ precision: The precision curve (list).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+ # correct AP calculation
+ # first append sentinel values at the end
+ mrec = np.concatenate(([0.], recall, [1.]))
+ mpre = np.concatenate(([0.], precision, [0.]))
+
+ # compute the precision envelope
+ for i in range(mpre.size - 1, 0, -1):
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ i = np.where(mrec[1:] != mrec[:-1])[0]
+
+ # and sum (\Delta recall) * prec
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+ return ap
+
+
+def _softmax(x, axis=-1):
+ x = x - np.amax(x, axis, keepdims=True)
+ e_x = np.exp(x)
+
+ return e_x / e_x.sum(axis, keepdims=True)
diff --git a/imageai_tf_deprecated/Detection/Custom/voc.py b/imageai_tf_deprecated/Detection/Custom/voc.py
new file mode 100644
index 00000000..52958e2a
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/Custom/voc.py
@@ -0,0 +1,67 @@
+import os
+import xml.etree.ElementTree as ET
+import pickle
+
+
+def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]):
+ if os.path.exists(cache_name):
+ with open(cache_name, 'rb') as handle:
+ cache = pickle.load(handle)
+ all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
+ else:
+ all_insts = list()
+ seen_labels = dict()
+
+ for ann in sorted(os.listdir(ann_dir)):
+ img = {'object': list()}
+
+ try:
+ tree = ET.parse(os.path.join(ann_dir, ann))
+ except Exception as e:
+ print(e)
+ print('Ignore this bad annotation: ' + os.path.join(ann_dir, ann))
+ continue
+
+ for elem in tree.iter():
+ if 'filename' in elem.tag:
+ img['filename'] = os.path.join(img_dir, elem.text)
+ if 'width' in elem.tag:
+ img['width'] = int(elem.text)
+ if 'height' in elem.tag:
+ img['height'] = int(elem.text)
+ if 'object' in elem.tag or 'part' in elem.tag:
+ obj = {}
+
+ for attr in list(elem):
+ if 'name' in attr.tag:
+ obj['name'] = attr.text
+
+ if obj['name'] in seen_labels:
+ seen_labels[obj['name']] += 1
+ else:
+ seen_labels[obj['name']] = 1
+
+ if len(labels) > 0 and obj['name'] not in labels:
+ break
+ else:
+ img['object'] += [obj]
+
+ if 'bndbox' in attr.tag:
+ for dim in list(attr):
+ if 'xmin' in dim.tag:
+ obj['xmin'] = int(round(float(dim.text)))
+ if 'ymin' in dim.tag:
+ obj['ymin'] = int(round(float(dim.text)))
+ if 'xmax' in dim.tag:
+ obj['xmax'] = int(round(float(dim.text)))
+ if 'ymax' in dim.tag:
+ obj['ymax'] = int(round(float(dim.text)))
+
+ if len(img['object']) > 0:
+ all_insts += [img]
+
+ cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
+ with open(cache_name, 'wb') as handle:
+ pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+ return all_insts, seen_labels
diff --git a/imageai_tf_deprecated/Detection/README.md b/imageai_tf_deprecated/Detection/README.md
new file mode 100644
index 00000000..9516e331
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/README.md
@@ -0,0 +1,299 @@
+# ImageAI : Object Detection
+
+A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)
+
+### TABLE OF CONTENTS
+
+- :white_square_button: First Object Detection
+- :white_square_button: Object Detection, Extraction and Fine-tune
+- :white_square_button: Custom Object Detection
+- :white_square_button: Detection Speed
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Image Input & Output Types
+- :white_square_button: Documentation
+
+
+ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class supports RetinaNet, YOLOv3 and TinyYOLOv3. To start performing object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:
+* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5)** _(Size = 145 mb, high performance and accuracy, with longer detection time)_
+* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
+* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_
+
+
+ Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
+ Then create a python file and give it a name; an example is FirstObjectDetection.py. Then write the code below into the python file:
+
+### FirstObjectDetection.py
+
+
+```python
+from imageai.Detection import ObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+detector = ObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.loadModel()
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"), minimum_percentage_probability=30)
+
+for eachObject in detections:
+ print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
+ print("--------------------------------")
+```
+
+Sample Result:
+Input Image
+
+Output Image
+
+
+```
+laptop : 87.32235431671143 : (306, 238, 390, 284)
+--------------------------------
+laptop : 96.86298966407776 : (121, 209, 258, 293)
+--------------------------------
+laptop : 98.6301600933075 : (279, 321, 401, 425)
+--------------------------------
+laptop : 99.78572130203247 : (451, 204, 579, 285)
+--------------------------------
+bed : 94.02391314506531 : (23, 205, 708, 553)
+--------------------------------
+apple : 48.03136885166168 : (527, 343, 557, 364)
+--------------------------------
+cup : 34.09906327724457 : (462, 347, 496, 379)
+--------------------------------
+cup : 44.65090036392212 : (582, 342, 618, 386)
+--------------------------------
+person : 57.70219564437866 : (27, 311, 341, 437)
+--------------------------------
+person : 85.26121377944946 : (304, 173, 387, 253)
+--------------------------------
+person : 96.33603692054749 : (415, 130, 538, 266)
+--------------------------------
+person : 96.95255160331726 : (174, 108, 278, 269)
+--------------------------------
+```
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection import ObjectDetection
+import os
+
+execution_path = os.getcwd()
+```
+
+ In the 3 lines above , we import the **ImageAI object detection** class in the first line, import the `os` in the second line and obtained the path to folder where our python file runs.
+
+```python
+detector = ObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.loadModel()
+```
+
+In the 4 lines above, we created a new instance of the `ObjectDetection` class in the first line, set the model type to YOLOv3 in the second line, set the model path to the YOLOv3 model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.
+
+```python
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"))
+
+for eachObject in detections:
+ print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
+ print("--------------------------------")
+```
+
+In the 2 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our image, and the path to the new image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object), `percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).
+
+Should you want to use the RetinaNet which is appropriate for high-performance and high-accuracy demanding detection tasks, you will download the RetinaNet model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
+
+```python
+detector = ObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+```
+
+However, if you desire TinyYOLOv3 which is optimized for speed and embedded devices, you will download the TinyYOLOv3 model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:
+
+```python
+detector = ObjectDetection()
+detector.setModelTypeAsTinyYOLOv3()
+detector.setModelPath( os.path.join(execution_path , "yolo-tiny.h5"))
+detector.loadModel()
+```
+
+## Object Detection, Extraction and Fine-tune
+
+
+In the examples we used above, we ran the object detection on an image and it returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image
+ and save it independently.
+
+In the example code below which is very identical to the previous object detction code, we will save each object detected as a seperate image.
+
+```python
+from imageai.Detection import ObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+detector = ObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.loadModel()
+
+detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30, extract_detected_objects=True)
+
+for eachObject, eachObjectPath in zip(detections, objects_path):
+ print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
+ print("Object's image saved in " + eachObjectPath)
+ print("--------------------------------")
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Let us review the part of the code that perform the object detection and extract the images:
+
+```python
+detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30, extract_detected_objects=True)
+
+for eachObject, eachObjectPath in zip(detections, objects_path):
+ print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
+ print("Object's image saved in " + eachObjectPath)
+ print("--------------------------------")
+```
+
+In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image path, and an extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory which is the **output image path + "-objects"** . Then it saves all the extracted images into this new directory with each image's name being the **detected object name + "-" + a number** which corresponds to the order at which the objects were detected.
+
+This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the first array.
+
+
+**And one important feature you need to know!** You will recall that the percentage probability
+ for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter `minimum_percentage_probability`, whose default value is `50` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the detection results. You fine-tune the object detection by setting **minimum_percentage_probability** equal to a smaller value to detect more number of objects or higher value to detect less number of objects.
+
+
+## Custom Object Detection
+
+
+The object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
+```
+person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
+parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,
+bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,
+dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven,
+toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer, toothbrush.
+```
+
+Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can
+ customize the type of object(s) you want to be detected in the image. Let's take a look at the code below:
+
+```python
+from imageai.Detection import ObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+detector = ObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
+detector.loadModel()
+
+custom_objects = detector.CustomObjects(car=True, motorcycle=True)
+detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)
+
+for eachObject in detections:
+ print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
+ print("--------------------------------")
+```
+
+
+
+
+Let us take a look at the part of the code that made this possible.
+```python
+custom_objects = detector.CustomObjects(car=True, motorcycle=True)
+detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)
+```
+
+In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
+`custom_objects = detector.CustomObjects()`, in which we set its car and motorcycle properties equal to **True**.
+This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromImage()`
+which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
+ to the custom objects variable we defined.
+
+
+## Detection Speed
+
+
+**ImageAI** now provides detection speeds for all object detection tasks. The detection speeds allow you to reduce
+ the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
+results. Coupled with lowering the `minimum_percentage_probability` parameter, detections can match the normal
+speed and yet reduce detection time drastically. The available detection speeds are **"normal"**(default), **"fast"**, **"faster"** , **"fastest"** and **"flash"**.
+All you need to do is to state the speed mode you desire when loading the model as seen below.
+
+```python
+detector.loadModel(detection_speed="fast")
+```
+
+
+## Hiding/Showing Object Name and Probability
+
+
+**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `display_object_name` and `display_percentage_probability` can be set to True of False individually. Take a look at the code below:
+
+```python
+detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
+```
+
+In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.
+
+
+
+
+## Image Input & Output Types
+
+
+**ImageAI** supports 3 types of inputs which are **file path to image file**(default), **numpy array of image** and **image file stream**
+as well as 2 types of output which are image **file**(default) and numpy **array **.
+This means you can now perform object detection in production applications such as on a web server and system
+ that returns file in any of the above stated formats.
+
+To perform object detection with numpy array or file stream input, you just need to state the input type
+in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.
+
+```python
+detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "image.jpg")) # For numpy array input type
+detections = detector.detectObjectsFromImage(input_type="stream", input_image=image_stream , output_image_path=os.path.join(execution_path , "test2new.jpg")) # For file stream input type
+```
+
+To perform object detection with numpy array output you just need to state the output type
+in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.
+
+```python
+detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="image.jpg" ) # For numpy array output type
+```
+
+
+## Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+* Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+* Documentation - **Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+* Documentation - **French Version [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
diff --git a/imageai_tf_deprecated/Detection/VIDEO.md b/imageai_tf_deprecated/Detection/VIDEO.md
new file mode 100644
index 00000000..3abc92cc
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/VIDEO.md
@@ -0,0 +1,426 @@
+# ImageAI : Video Object Detection, Tracking and Analysis
+
+A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)
+
+---
+
+## TABLE OF CONTENTS
+
+- :white_square_button: First Video Object Detection
+- :white_square_button: Custom Video Object Detection (Object Tracking)
+- :white_square_button: Camera / Live Stream Video Detection
+- :white_square_button: Video Analysis
+- :white_square_button: Detection Speed
+- :white_square_button: Hiding/Showing Object Name and Probability
+- :white_square_button: Frame Detection Intervals
+- :white_square_button: Video Detection Timeout (NEW)
+- :white_square_button: Documentation
+
+ImageAI provides convenient, flexible and powerful methods to perform object detection on videos. The video object detection class provided only supports RetinaNet, YOLOv3 and TinyYOLOv3. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
+
+To start performing video object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:
+
+- **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5)** _(Size = 145 mb, high performance and accuracy, with longer detection time)_
+- **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
+- **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_
+
+Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available for free.
+
+ Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
+ Then create a python file and give it a name; an example is `FirstVideoObjectDetection.py`. Then write the code below into the python file:
+
+
+### FirstVideoObjectDetection.py
+
+
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+detector = VideoObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+
+video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected")
+ , frames_per_second=20, log_progress=True)
+print(video_path)
+```
+
+
+Input Video (a 1min 24seconds video)
+
+[](https://github.com/OlafenwaMoses/ImageAI/blob/master/data-videos/traffic.mp4)
+
+Output Video
+[](https://www.youtube.com/embed/qplVDqOmElI?rel=0)
+
+Let us make a breakdown of the object detection code that we used above.
+
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+```
+
+ In the 3 lines above , we import the **ImageAI video object detection ** class in the first line, import the **os** in the second line and obtained
+ the path to folder where our python file runs.
+
+```python
+detector = VideoObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+```
+
+In the 4 lines above, we created a new instance of the **VideoObjectDetection** class in the first line, set the model type to RetinaNet in the second line, set the model path to the RetinaNet model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.
+
+```python
+video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=20, log_progress=True)
+print(video_path)
+```
+
+In the 2 lines above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video which contains boxes and percentage probabilities rendered on objects detected in the video.
+
+
+### Custom Video Object Detection
+
+
+The video object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
+```
+ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
+ parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+ giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+ sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,
+ bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+ broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,
+ dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave,
+ oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer,
+ toothbrush.
+```
+
+
+Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can customize the type of object(s) you want to be detected in the video. Let's take a look at the code below:
+
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+detector = VideoObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+
+custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)
+
+video_path = detector.detectCustomObjectsFromVideo(
+ custom_objects=custom_objects,
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
+ frames_per_second=20, log_progress=True)
+print(video_path)
+```
+
+Let us take a look at the part of the code that made this possible.
+
+```python
+custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)
+
+video_path = detector.detectCustomObjectsFromVideo(
+ custom_objects=custom_objects,
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
+ frames_per_second=20, log_progress=True)
+```
+
+In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
+`custom_objects = detector.CustomObjects()`, in which we set its person, car and motorcycle properties equal to **True**.
+This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromVideo()`
+which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
+ to the custom objects variable we defined.
+
+Output Video
+[](https://www.youtube.com/embed/YfAycAzkwPM?rel=0)
+C:\Users\User\PycharmProjects\ImageAITest\traffic_custom_detected.avi
+
+
+### Camera / Live Stream Video Detection
+
+
+**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s `VideoCapture()` function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` functions. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.
+
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+
+
+camera = cv2.VideoCapture(0)
+
+detector = VideoObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath(os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+
+
+video_path = detector.detectObjectsFromVideo(
+ camera_input=camera,
+ output_file_path=os.path.join(execution_path, "camera_detected_video"),
+ frames_per_second=20, log_progress=True, minimum_percentage_probability=40)
+```
+
+The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter `camera_input` which replaces the `input_file_path` that is used for video file.
+
+### Video Analysis
+
+
+**ImageAI** now provide commercial-grade video analysis in the Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.
+
+For video analysis, the `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.
+
+To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the `per_frame_function`, `per_second_function`, `per_minute_function` and `video_complete_function` parameters in the detection function. Find below examples of video analysis functions.
+
+```python
+def forFrame(frame_number, output_array, output_count):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("------------END OF A FRAME --------------")
+
+def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
+ print("SECOND : ", second_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last second: ", average_output_count)
+ print("------------END OF A SECOND --------------")
+
+def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
+ print("MINUTE : ", minute_number)
+ print("Array for the outputs of each frame ", output_arrays)
+ print("Array for output count for unique objects in each frame : ", count_arrays)
+ print("Output average count for unique objects in the last minute: ", average_output_count)
+ print("------------END OF A MINUTE --------------")
+
+video_detector = VideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath(os.path.join(execution_path, "yolo.h5"))
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=10,
+ per_second_function=forSeconds,
+ per_frame_function=forFrame,
+ per_minute_function=forMinute,
+ minimum_percentage_probability=30
+)
+```
+
+When the detection starts on a video feed, be it from a video file or camera input, the result will have the format as below:
+
+**Results for the Frame function**
+```
+FOR FRAME : 1
+
+Output for each object : [{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}]
+
+Output count for unique objects : {'bus': 4, 'boat': 3, 'person': 1, 'car': 8}
+
+------------END OF A FRAME --------------
+```
+
+For any function you parse into the **per_frame_function**, the function will be executed after every single video frame is processed and he following will be parsed into it:
+
+* **Frame Index:** This is the position number of the frame inside the video (e.g 1 for first frame and 20 for twentieth frame).
+* **Output Array:** This is an array of dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
+* **Output Count:** This is a dictionary that has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
+
+**Results for the Second function**
+```
+FOR SECOND : 1
+
+ Array for the outputs of each frame [[{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}],
+ [{'box_points': (316, 240, 384, 302), 'name': 'boat', 'percentage_probability': 29.594269394874573}, {'box_points': (361, 295, 441, 354), 'name': 'boat', 'percentage_probability': 36.11513376235962}, {'box_points': (216, 305, 340, 357), 'name': 'boat', 'percentage_probability': 44.89373862743378}, {'box_points': (432, 198, 488, 244), 'name': 'truck', 'percentage_probability': 22.914741933345795}, {'box_points': (589, 199, 623, 240), 'name': 'bus', 'percentage_probability': 20.545457303524017}, {'box_points': (519, 182, 583, 263), 'name': 'bus', 'percentage_probability': 24.467085301876068}, {'box_points': (492, 197, 563, 271), 'name': 'bus', 'percentage_probability': 61.112016439437866}, {'box_points': (433, 188, 490, 241), 'name': 'bus', 'percentage_probability': 65.08989334106445}, {'box_points': (352, 303, 442, 357), 'name': 'car', 'percentage_probability': 20.025095343589783}, {'box_points': (136, 172, 188, 195), 'name': 'car', 'percentage_probability': 21.571354568004608}, {'box_points': (152, 276, 261, 326), 'name': 'car', 'percentage_probability': 33.07966589927673}, {'box_points': (181, 225, 230, 256), 'name': 'car', 'percentage_probability': 35.111838579177856}, {'box_points': (432, 198, 488, 244), 'name': 'car', 'percentage_probability': 36.25282347202301}, {'box_points': (3, 292, 130, 360), 'name': 'car', 'percentage_probability': 67.55480170249939}, {'box_points': (479, 265, 546, 314), 'name': 'car', 'percentage_probability': 71.47912979125977}, {'box_points': (597, 269, 625, 318), 'name': 'person', 'percentage_probability': 25.903674960136414}],................,
+[{'box_points': (133, 250, 187, 278), 'name': 'umbrella', 'percentage_probability': 21.518094837665558}, {'box_points': (154, 233, 218, 259), 'name': 'umbrella', 'percentage_probability': 23.687003552913666}, {'box_points': (348, 311, 425, 360), 'name': 'boat', 'percentage_probability': 21.015766263008118}, {'box_points': (11, 164, 137, 225), 'name': 'bus', 'percentage_probability': 32.20453858375549}, {'box_points': (424, 187, 485, 243), 'name': 'bus', 'percentage_probability': 38.043853640556335}, {'box_points': (496, 186, 570, 264), 'name': 'bus', 'percentage_probability': 63.83994221687317}, {'box_points': (588, 197, 622, 240), 'name': 'car', 'percentage_probability': 23.51653128862381}, {'box_points': (58, 268, 111, 303), 'name': 'car', 'percentage_probability': 24.538707733154297}, {'box_points': (2, 246, 72, 301), 'name': 'car', 'percentage_probability': 28.433072566986084}, {'box_points': (472, 273, 539, 323), 'name': 'car', 'percentage_probability': 87.17672824859619}, {'box_points': (597, 270, 626, 317), 'name': 'person', 'percentage_probability': 27.459821105003357}]
+ ]
+
+Array for output count for unique objects in each frame : [{'bus': 4, 'boat': 3, 'person': 1, 'car': 8},
+ {'truck': 1, 'bus': 4, 'boat': 3, 'person': 1, 'car': 7},
+ {'bus': 5, 'boat': 2, 'person': 1, 'car': 5},
+ {'bus': 5, 'boat': 1, 'person': 1, 'car': 9},
+ {'truck': 1, 'bus': 2, 'car': 6, 'person': 1},
+ {'truck': 2, 'bus': 4, 'boat': 2, 'person': 1, 'car': 7},
+ {'truck': 1, 'bus': 3, 'car': 7, 'person': 1, 'umbrella': 1},
+ {'bus': 4, 'car': 7, 'person': 1, 'umbrella': 2},
+ {'bus': 3, 'car': 6, 'boat': 1, 'person': 1, 'umbrella': 3},
+ {'bus': 3, 'car': 4, 'boat': 1, 'person': 1, 'umbrella': 2}]
+
+Output average count for unique objects in the last second: {'truck': 0.5, 'bus': 3.7, 'umbrella': 0.8, 'boat': 1.3, 'person': 1.0, 'car': 6.6}
+
+------------END OF A SECOND --------------
+```
+
+In the above result, the video was processed and saved in 10 frames per second (FPS). For any function you parse into the **per_second_function**, the function will be executed after every single second of the video that is processed and he following will be parsed into it:
+
+- **Second Index:** This is the position number of the second inside the video (e.g 1 for first second and 20 for twentieth second).
+- **Output Array:** This is an array of arrays, with each contained array and its position (array index + 1) corresponding to the equivalent frame in the last second of the video (In the above example, their are 10 arrays which corresponds to the 10 frames contained in one second). Each contained array contains dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
+- **Count arrays:** This is an array of dictionaries. Each dictionary and its position (array index + 1) corresponds to the equivalent frame in the last second of he video. Each dictionary has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
+- **Average Output Count:** This is a dictionary that has the name of each unique object detected in the last second as its keys and the average number of instances of the objects detected across the number of frames as the values.
+
+**Results for the Minute function**
+The above set of **4 parameters** that are returned for every second of the video processed is the same parameters to that will be returned for every minute of the video processed. The difference is that the index returned corresponds to the minute index, the **output_arrays** is an array that contains the number of FPS * 60 number of arrays (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 arrays), and the **count_arrays** is an array that contains the number of FPS * 60 number of dictionaries (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 dictionaries) and the **average_output_count** is a dictionary that covers all the objects detected in all the frames contained in the last minute.
+
+**Results for the Video Complete Function**
+**ImageAI** allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()` function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function:
+
+```python
+def forFull(output_arrays, count_arrays, average_output_count):
+ #Perform action on the 3 parameters returned into the function
+
+video_detector.detectObjectsFromVideo(
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=10,
+ video_complete_function=forFull,
+ minimum_percentage_probability=30
+)
+```
+
+**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set `return_detected_frame=True` in your `detectObjectsFromVideo()` or `detectCustomObjectsFrom()` function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:
+
+```python
+def forFrame(frame_number, output_array, output_count, detected_frame):
+ print("FOR FRAME " , frame_number)
+ print("Output for each object : ", output_array)
+ print("Output count for unique objects : ", output_count)
+ print("Returned Objects is : ", type(detected_frame))
+ print("------------END OF A FRAME --------------")
+
+video_detector.detectObjectsFromVideo(
+ input_file_path=os.path.join(execution_path, "traffic.mp4"),
+ output_file_path=os.path.join(execution_path, "traffic_detected"),
+ frames_per_second=10,
+ per_frame_function=forFrame,
+ minimum_percentage_probability=30,
+ return_detected_frame=True
+)
+```
+
+### Video Detection Speed
+
+
+**ImageAI** now provides detection speeds for all video object detection tasks. The detection speeds allow you to reduce
+ the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
+results. Coupled with lowering the **minimum_percentage_probability** parameter, detections can closely match the normal
+speed and yet reduce detection time drastically. The available detection speeds are **"normal"**(default), **"fast"**, **"faster"** , **"fastest"** and **"flash"**.
+All you need to do is to state the speed mode you desire when loading the model as seen below.
+
+```python
+detector.loadModel(detection_speed="fast")
+```
+
+To observe the differences in the detection speeds, look below for each speed applied to object detection with
+ coupled with the adjustment of the minimum_percentage_probability , time taken to detect and detections given.
+The results below are obtained from detections performed on a NVIDIA K80 GPU. Links are provided below to download
+ the videos for each detection speed applied.
+
+Video Length = 1min 24seconds, Detection Speed = "normal" , Minimum Percentage Probability = 50 (default), Detection Time = 29min 3seconds
+[](https://www.youtube.com/embed/qplVDqOmElI?rel=0)
+
+
+**Video Length = 1min 24seconds, Detection Speed = "fast" , Minimum Percentage Probability = 40, Detection Time = 11min 6seconds**
+
+
+**Video Length = 1min 24seconds, Detection Speed = "faster" , Minimum Percentage Probability = 30, Detection Time = 7min 47seconds**
+
+
+**Video Length = 1min 24seconds, Detection Speed = "fastest" , Minimum Percentage Probability = 20, Detection Time = 6min 20seconds**
+
+
+**Video Length = 1min 24seconds, Detection Speed = "flash" , Minimum Percentage Probability = 10, Detection Time = 3min 55seconds**
+
+
+If you use more powerful NVIDIA GPUs, you will definitely have faster detection time than stated above.
+
+### Frame Detection Intervals
+
+
+The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up your video detection process. When calling the `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()`, you can specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
+If your output video **frames_per_second** is set to 20, that means the object detections in the video will be updated once in every quarter of a second or every second. This is useful in case scenarious where the available compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time , half-a-second-real-time or whichever way suits your needs. We conducted video object detection on the same input video we have been using all this while by applying a **frame_detection_interval** value equal to 5.
+The results below are obtained from detections performed on a NVIDIA K80 GPU.
+See the results and link to download the videos below:
+
+
+**Video Length = 1min 24seconds, Detection Speed = "normal" , Minimum Percentage Probability = 50 (default), Frame Detection Interval = 5, Detection Time = 15min 49seconds**
+
+
+
+
+**Video Length = 1min 24seconds, Detection Speed = "fast" , Minimum Percentage Probability = 40, Frame Detection Interval = 5, Detection Time = 5min 6seconds**
+
+
+
+
+**Video Length = 1min 24seconds, Detection Speed = "faster" , Minimum Percentage Probability = 30, Frame Detection Interval = 5, Detection Time = 3min 18seconds**
+
+
+
+
+**Video Length = 1min 24seconds, Detection Speed = "fastest" , Minimum Percentage Probability = 20 , Frame Detection Interval = 5, Detection Time = 2min 18seconds**
+[](https://www.youtube.com/embed/S-jgBTQgbd4?rel=0)
+
+
+**Video Length = 1min 24seconds, Detection Speed = "flash" , Minimum Percentage Probability = 10, Frame Detection Interval = 5, Detection Time = 1min 27seconds**
+
+[Download detected video at speed "flash" and interval=5](https://drive.google.com/open?id=1aN2nnVoFjhUWpcz2Und3dsCT9OKrakM0)
+
+
+###Video Detection Timeout
+
+
+**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed.
+To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes).
+
+```python
+from imageai.Detection import VideoObjectDetection
+import os
+import cv2
+
+execution_path = os.getcwd()
+camera = cv2.VideoCapture(0)
+
+detector = VideoObjectDetection()
+detector.setModelTypeAsRetinaNet()
+detector.setModelPath(os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
+detector.loadModel()
+
+
+video_path = detector.detectObjectsFromVideo(camera_input=camera,
+ output_file_path=os.path.join(execution_path, "camera_detected_video"),
+ frames_per_second=20,
+ log_progress=True,
+ minimum_percentage_probability=40,
+ detection_timeout=120)
+```
+
+
+### Documentation
+
+
+We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:
+
+- Documentation - **English Version [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
+- Documentation - **Chinese Version [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
+- Documentation - **French Version [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**
+
diff --git a/imageai/Prediction/DenseNet/__init__.py b/imageai_tf_deprecated/Detection/YOLO/__init__.py
similarity index 100%
rename from imageai/Prediction/DenseNet/__init__.py
rename to imageai_tf_deprecated/Detection/YOLO/__init__.py
diff --git a/imageai_tf_deprecated/Detection/YOLO/utils.py b/imageai_tf_deprecated/Detection/YOLO/utils.py
new file mode 100644
index 00000000..fdd8098e
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/YOLO/utils.py
@@ -0,0 +1,363 @@
+import tensorflow as tf
+from keras import backend as K
+import numpy as np
+from PIL import Image
+import cv2
+
+
+def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
+
+ num_anchors = len(anchors)
+
+ anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
+
+ grid_shape = K.shape(feats)[1:3]
+ grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
+ [1, grid_shape[1], 1, 1])
+ grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
+ [grid_shape[0], 1, 1, 1])
+ grid = K.concatenate([grid_x, grid_y])
+ grid = K.cast(grid, K.dtype(feats))
+
+ feats = K.reshape(
+ feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
+
+
+ box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
+ box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
+ box_confidence = K.sigmoid(feats[..., 4:5])
+ box_class_probs = K.sigmoid(feats[..., 5:])
+
+ if calc_loss == True:
+ return grid, feats, box_xy, box_wh
+ return box_xy, box_wh, box_confidence, box_class_probs
+
+
+def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
+
+ box_yx = box_xy[..., ::-1]
+ box_hw = box_wh[..., ::-1]
+ input_shape = K.cast(input_shape, K.dtype(box_yx))
+ image_shape = K.cast(image_shape, K.dtype(box_yx))
+ new_shape = K.round(image_shape * K.min(input_shape/image_shape))
+ offset = (input_shape-new_shape)/2./input_shape
+ scale = input_shape/new_shape
+ box_yx = (box_yx - offset) * scale
+ box_hw *= scale
+
+ box_mins = box_yx - (box_hw / 2.)
+ box_maxes = box_yx + (box_hw / 2.)
+ boxes = K.concatenate([
+ box_mins[..., 0:1],
+ box_mins[..., 1:2],
+ box_maxes[..., 0:1],
+ box_maxes[..., 1:2]
+ ])
+
+
+ boxes *= K.concatenate([image_shape, image_shape])
+ return boxes
+
+
+def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
+
+ box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
+ anchors, num_classes, input_shape)
+ boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
+ boxes = K.reshape(boxes, [-1, 4])
+ box_scores = box_confidence * box_class_probs
+ box_scores = K.reshape(box_scores, [-1, num_classes])
+ return boxes, box_scores
+
+
+def yolo_eval(yolo_outputs,
+ anchors,
+ num_classes,
+ image_shape,
+ max_boxes=20,
+ score_threshold=.6,
+ iou_threshold=.5):
+
+ num_layers = len(yolo_outputs)
+ anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
+ input_shape = K.shape(yolo_outputs[0])[1:3] * 32
+ boxes = []
+ box_scores = []
+ for l in range(num_layers):
+ _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
+ anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
+ boxes.append(_boxes)
+ box_scores.append(_box_scores)
+ boxes = K.concatenate(boxes, axis=0)
+ box_scores = K.concatenate(box_scores, axis=0)
+
+ mask = box_scores >= score_threshold
+ max_boxes_tensor = K.constant(max_boxes, dtype='int32')
+ boxes_ = []
+ scores_ = []
+ classes_ = []
+ for c in range(num_classes):
+ class_boxes = tf.boolean_mask(boxes, mask[:, c])
+ class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
+ nms_index = tf.image.non_max_suppression(
+ class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
+ class_boxes = K.gather(class_boxes, nms_index)
+ class_box_scores = K.gather(class_box_scores, nms_index)
+ classes = K.ones_like(class_box_scores, 'int32') * c
+ boxes_.append(class_boxes)
+ scores_.append(class_box_scores)
+ classes_.append(classes)
+ boxes_ = K.concatenate(boxes_, axis=0)
+ scores_ = K.concatenate(scores_, axis=0)
+ classes_ = K.concatenate(classes_, axis=0)
+
+ return boxes_, scores_, classes_
+
+
+
+def letterbox_image(image, size):
+ iw, ih = image.size
+ w, h = size
+ scale = min(w/iw, h/ih)
+ nw = int(iw*scale)
+ nh = int(ih*scale)
+
+ image = image.resize((nw,nh), Image.BICUBIC)
+ new_image = Image.new('RGB', size, (128,128,128))
+ new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+ return new_image
+
+
+
+
+def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+ if (float(net_w)/image_w) < (float(net_h)/image_h):
+ new_w = net_w
+ new_h = (image_h*net_w)/image_w
+ else:
+ new_h = net_w
+ new_w = (image_w*net_h)/image_h
+
+ for i in range(len(boxes)):
+ x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
+ y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
+
+ boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+ boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+ boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+ boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+
+
+class BoundBox:
+ def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
+ self.xmin = xmin
+ self.ymin = ymin
+ self.xmax = xmax
+ self.ymax = ymax
+
+ self.objness = objness
+ self.classes = classes
+
+ self.label = -1
+ self.score = -1
+
+ def get_label(self):
+ if self.label == -1:
+ self.label = np.argmax(self.classes)
+
+ return self.label
+
+ def get_score(self):
+ if self.score == -1:
+ self.score = self.classes[self.get_label()]
+
+ return self.score
+
+
+def _interval_overlap(interval_a, interval_b):
+ x1, x2 = interval_a
+ x3, x4 = interval_b
+
+ if x3 < x1:
+ if x4 < x1:
+ return 0
+ else:
+ return min(x2,x4) - x1
+ else:
+ if x2 < x3:
+ return 0
+ else:
+ return min(x2,x4) - x3
+
+def _sigmoid(x):
+ return 1. / (1. + np.exp(-x))
+
+def bbox_iou(box1, box2):
+ intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+ intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+
+ intersect = intersect_w * intersect_h
+
+ w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
+ w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
+
+ union = w1*h1 + w2*h2 - intersect
+
+ return float(intersect) / union
+
+
+def do_nms(boxes, nms_thresh):
+ if len(boxes) > 0:
+ nb_class = len(boxes[0].classes)
+ else:
+ return
+
+ for c in range(nb_class):
+ sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+
+ for i in range(len(sorted_indices)):
+ index_i = sorted_indices[i]
+
+ if boxes[index_i].classes[c] == 0: continue
+
+ for j in range(i+1, len(sorted_indices)):
+ index_j = sorted_indices[j]
+
+ if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+ boxes[index_j].classes[c] = 0
+
+def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
+ grid_h, grid_w = netout.shape[:2]
+ nb_box = 3
+ netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+ nb_class = netout.shape[-1] - 5
+
+ boxes = []
+
+ netout[..., :2] = _sigmoid(netout[..., :2])
+ netout[..., 4:] = _sigmoid(netout[..., 4:])
+ netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
+ netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+ for i in range(grid_h*grid_w):
+ row = i / grid_w
+ col = i % grid_w
+
+ for b in range(nb_box):
+ # 4th element is objectness score
+ objectness = netout[int(row)][int(col)][b][4]
+ #objectness = netout[..., :4]
+
+ if(objectness.all() <= obj_thresh): continue
+
+ # first 4 elements are x, y, w, and h
+ x, y, w, h = netout[int(row)][int(col)][b][:4]
+
+ x = (col + x) / grid_w # center position, unit: image width
+ y = (row + y) / grid_h # center position, unit: image height
+ w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+ h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+
+ # last elements are class probabilities
+ classes = netout[int(row)][col][b][5:]
+
+ box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
+ #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)
+
+ boxes.append(box)
+
+ return boxes
+
+def preprocess_input(image, input_shape):
+ net_h, net_w = input_shape
+ new_h, new_w, _ = image.shape
+
+ # determine the new size of the image
+ if (float(net_w)/new_w) < (float(net_h)/new_h):
+ new_h = int((new_h * net_w)/new_w)
+ new_w = net_w
+ else:
+ new_w = int((new_w * net_h)/new_h)
+ new_h = net_h
+
+ # resize the image to the new size
+ resized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))
+
+ # embed the image into the standard letter box
+ new_image = np.ones((net_h, net_w, 3)) * 0.5
+ new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized
+ new_image = np.expand_dims(new_image, 0)
+
+ return new_image
+
+def retrieve_yolo_detections(yolo_result, anchors, min_probability, nms_thresh, image_input_size, image_size, labels_dict ):
+
+ boxes = []
+
+ for i in range(len(yolo_result)):
+ # decode the output of the network
+ boxes += decode_netout(yolo_result[i][0],
+ anchors[i],
+ min_probability,
+ nms_thresh,
+ image_input_size[0],
+ image_input_size[1])
+
+ # correct the sizes of the bounding boxes
+ correct_yolo_boxes(boxes, image_size[1], image_size[0], image_input_size[0], image_input_size[1])
+
+ # suppress non-maximal boxes
+ do_nms(boxes, nms_thresh)
+
+ detections = list()
+ for box in boxes:
+ label = -1
+
+ for i in range(len(labels_dict.keys())):
+ if box.classes[i] > min_probability:
+ label = labels_dict[i]
+
+
+ percentage_probability = box.classes[i] * 100
+ xmin = box.xmin
+ ymin = box.ymin
+ xmax = box.xmax
+ ymax = box.ymax
+
+ if xmin < 0:
+ xmin = 0
+
+ if ymin < 0:
+ ymin = 0
+
+ detection = dict()
+ detection["name"] = label
+ detection["percentage_probability"] = percentage_probability
+ detection["box_points"] = [ xmin, ymin, xmax, ymax]
+
+ detections.append(detection)
+
+ return detections
+
+
+def draw_boxes(image, box_points, draw_box, label, percentage_probability, color):
+
+ xmin, ymin, xmax, ymax = box_points
+
+ if draw_box is True:
+ cv2.rectangle(image, (xmin,ymin), (xmax,ymax), color, 2)
+
+ if label is not None:
+ if percentage_probability is None:
+ label = "{}".format(label)
+ else:
+ label = "{} {:.2f}%".format(label, percentage_probability)
+ elif percentage_probability is not None:
+ label = "{:.2f}".format(percentage_probability)
+
+ if label is not None or percentage_probability is not None:
+ cv2.putText(image, label, (xmin, ymin - 13), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (255, 0, 0), 2)
+ cv2.putText(image, label, (xmin, ymin - 13), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (255, 255, 255), 1)
+
+ return image
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Detection/YOLO/yolov3.py b/imageai_tf_deprecated/Detection/YOLO/yolov3.py
new file mode 100644
index 00000000..efe8cd71
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/YOLO/yolov3.py
@@ -0,0 +1,361 @@
+from tensorflow.keras.layers import Conv2D, MaxPool2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, LeakyReLU, Lambda
+from tensorflow.keras.layers import LeakyReLU
+from tensorflow.keras.layers import BatchNormalization
+from tensorflow.keras.regularizers import l2
+from tensorflow.keras.models import Model
+from tensorflow.keras import Input
+from tensorflow.keras.layers import add, concatenate
+from tensorflow.keras.layers import Layer
+import tensorflow as tf
+
+
+
+
+class YoloLayer(Layer):
+ def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
+ grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
+ **kwargs):
+ # make the model settings persistent
+ self.ignore_thresh = ignore_thresh
+ self.warmup_batches = warmup_batches
+ self.anchors = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
+ self.grid_scale = grid_scale
+ self.obj_scale = obj_scale
+ self.noobj_scale = noobj_scale
+ self.xywh_scale = xywh_scale
+ self.class_scale = class_scale
+
+ # make a persistent mesh grid
+ max_grid_h, max_grid_w = max_grid
+
+ cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
+ cell_y = tf.transpose(cell_x, (0,2,1,3,4))
+ self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
+
+ super(YoloLayer, self).__init__(**kwargs)
+
+ def build(self, input_shape):
+ super(YoloLayer, self).build(input_shape) # Be sure to call this somewhere!
+
+ def call(self, x):
+ input_image, y_pred, y_true, true_boxes = x
+
+ # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
+ y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
+
+ # initialize the masks
+ object_mask = tf.expand_dims(y_true[..., 4], 4)
+
+ # the variable to keep track of number of batches processed
+ batch_seen = tf.Variable(0.)
+
+ # compute grid factor and net factor
+ grid_h = tf.shape(y_true)[1]
+ grid_w = tf.shape(y_true)[2]
+ grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
+
+ net_h = tf.shape(input_image)[1]
+ net_w = tf.shape(input_image)[2]
+ net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
+
+ """
+ Adjust prediction
+ """
+ pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy
+ pred_box_wh = y_pred[..., 2:4] # t_wh
+ pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence
+ pred_box_class = y_pred[..., 5:] # adjust class probabilities
+
+ """
+ Adjust ground truth
+ """
+ true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
+ true_box_wh = y_true[..., 2:4] # t_wh
+ true_box_conf = tf.expand_dims(y_true[..., 4], 4)
+ true_box_class = tf.argmax(y_true[..., 5:], -1)
+
+ """
+ Compare each predicted box to all true boxes
+ """
+ # initially, drag all objectness of all boxes to 0
+ conf_delta = pred_box_conf - 0
+
+ # then, ignore the boxes which have good overlap with some true box
+ true_xy = true_boxes[..., 0:2] / grid_factor
+ true_wh = true_boxes[..., 2:4] / net_factor
+
+ true_wh_half = true_wh / 2.
+ true_mins = true_xy - true_wh_half
+ true_maxes = true_xy + true_wh_half
+
+ pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
+ pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
+
+ pred_wh_half = pred_wh / 2.
+ pred_mins = pred_xy - pred_wh_half
+ pred_maxes = pred_xy + pred_wh_half
+
+ intersect_mins = tf.maximum(pred_mins, true_mins)
+ intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+
+ intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
+ intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+ true_areas = true_wh[..., 0] * true_wh[..., 1]
+ pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+ union_areas = pred_areas + true_areas - intersect_areas
+ iou_scores = tf.truediv(intersect_areas, union_areas)
+
+ best_ious = tf.reduce_max(iou_scores, axis=4)
+ conf_delta *= tf.expand_dims(tf.cast((best_ious < self.ignore_thresh), dtype=tf.float32), 4)
+
+ """
+ Compute some online statistics
+ """
+ true_xy = true_box_xy / grid_factor
+ true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
+
+ true_wh_half = true_wh / 2.
+ true_mins = true_xy - true_wh_half
+ true_maxes = true_xy + true_wh_half
+
+ pred_xy = pred_box_xy / grid_factor
+ pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
+
+ pred_wh_half = pred_wh / 2.
+ pred_mins = pred_xy - pred_wh_half
+ pred_maxes = pred_xy + pred_wh_half
+
+ intersect_mins = tf.maximum(pred_mins, true_mins)
+ intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+ intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
+ intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+ true_areas = true_wh[..., 0] * true_wh[..., 1]
+ pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+ union_areas = pred_areas + true_areas - intersect_areas
+ iou_scores = tf.truediv(intersect_areas, union_areas)
+ iou_scores = object_mask * tf.expand_dims(iou_scores, 4)
+
+ count = tf.reduce_sum(object_mask)
+ count_noobj = tf.reduce_sum(1 - object_mask)
+ detect_mask = tf.cast((pred_box_conf*object_mask >= 0.5), dtype=tf.float32)
+ class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(pred_box_class, -1), true_box_class), dtype=tf.float32), 4)
+ recall50 = tf.reduce_sum(tf.cast((iou_scores >= 0.5), dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
+ recall75 = tf.reduce_sum(tf.cast((iou_scores >= 0.75), dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
+ avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
+ avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
+ avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
+ avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
+
+ """
+ Warm-up training
+ """
+ batch_seen = tf.compat.v1.assign_add(batch_seen, 1.)
+
+ true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1),
+ lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
+ true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
+ tf.ones_like(object_mask)],
+ lambda: [true_box_xy,
+ true_box_wh,
+ object_mask])
+
+ """
+ Compare each true box to all anchor boxes
+ """
+ wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
+ wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
+
+ xy_delta = xywh_mask * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
+ wh_delta = xywh_mask * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
+ conf_delta = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
+ class_delta = object_mask * \
+ tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
+ self.class_scale
+
+ loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5)))
+ loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5)))
+ loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5)))
+ loss_class = tf.reduce_sum(class_delta, list(range(1,5)))
+
+ loss = loss_xy + loss_wh + loss_conf + loss_class
+
+
+ return loss*self.grid_scale
+
+ def compute_output_shape(self, input_shape):
+ return [(None, 1)]
+
+
+
+def dummy_loss(y_true, y_pred):
+ return tf.sqrt(tf.reduce_sum(y_pred))
+
+def NetworkConv2D_BN_Leaky(input, channels, kernel_size, kernel_regularizer = l2(5e-4), strides=(1,1), padding="same", use_bias=False):
+
+ network = Conv2D( filters=channels, kernel_size=kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, use_bias=use_bias)(input)
+ network = BatchNormalization()(network)
+ network = LeakyReLU(alpha=0.1)(network)
+ return network
+
+def residual_block(input, channels, num_blocks):
+ network = ZeroPadding2D(((1,0), (1,0)))(input)
+ network = NetworkConv2D_BN_Leaky(input=network,channels=channels, kernel_size=(3,3), strides=(2,2), padding="valid")
+
+ for blocks in range(num_blocks):
+ network_1 = NetworkConv2D_BN_Leaky(input=network, channels= channels // 2, kernel_size=(1,1))
+ network_1 = NetworkConv2D_BN_Leaky(input=network_1,channels= channels, kernel_size=(3,3))
+
+ network = Add()([network, network_1])
+ return network
+
+def darknet(input):
+ network = NetworkConv2D_BN_Leaky(input=input, channels=32, kernel_size=(3,3))
+ network = residual_block(input=network, channels=64, num_blocks=1)
+ network = residual_block(input=network, channels=128, num_blocks=2)
+ network = residual_block(input=network, channels=256, num_blocks=8)
+ network = residual_block(input=network, channels=512, num_blocks=8)
+ network = residual_block(input=network, channels=1024, num_blocks=4)
+
+
+ return network
+
+def last_layers(input, channels_in, channels_out, layer_name=""):
+
+
+
+ network = NetworkConv2D_BN_Leaky( input=input, channels=channels_in, kernel_size=(1,1))
+ network = NetworkConv2D_BN_Leaky(input=network, channels= (channels_in * 2) , kernel_size=(3, 3))
+ network = NetworkConv2D_BN_Leaky(input=network, channels=channels_in, kernel_size=(1, 1))
+ network = NetworkConv2D_BN_Leaky(input=network, channels=(channels_in * 2), kernel_size=(3, 3))
+ network = NetworkConv2D_BN_Leaky(input=network, channels=channels_in, kernel_size=(1, 1))
+
+ network_1 = NetworkConv2D_BN_Leaky(input=network, channels=(channels_in * 2), kernel_size=(3, 3))
+ network_1 = Conv2D(filters=channels_out, kernel_size=(1,1), name=layer_name)(network_1)
+
+ return network, network_1
+
+def yolov3_base(input, num_anchors, num_classes):
+
+ darknet_network = Model(input, darknet(input))
+
+ network, network_1 = last_layers(darknet_network.output, 512, num_anchors * (num_classes + 5), layer_name="last1")
+
+ network = NetworkConv2D_BN_Leaky( input=network, channels=256, kernel_size=(1,1))
+ network = UpSampling2D(2)(network)
+ network = Concatenate()([network, darknet_network.layers[152].output])
+
+ network, network_2 = last_layers(network, 256, num_anchors * (num_classes + 5), layer_name="last2")
+
+ network = NetworkConv2D_BN_Leaky(input=network, channels=128, kernel_size=(1, 1))
+ network = UpSampling2D(2)(network)
+ network = Concatenate()([network, darknet_network.layers[92].output])
+
+ network, network_3 = last_layers(network, 128, num_anchors * (num_classes + 5), layer_name="last3")
+
+ return input, network_1, network_2, network_3
+
+def yolov3_main(input, num_anchors, num_classes):
+
+ input, network_1, network_2, network_3 = yolov3_base(input, num_anchors, num_classes)
+
+ return Model(input, [network_1, network_2, network_3])
+
+
+def yolov3_train(num_classes,
+ anchors,
+ max_box_per_image,
+ max_grid,
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales,
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale):
+
+ input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3
+ true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))
+ true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
+ true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
+ true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
+
+
+
+ _ , network_1, network_2, network_3 = yolov3_base(input_image, len(anchors)//6, num_classes)
+
+ loss_yolo_1 = YoloLayer(anchors[12:],
+ [1*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[0],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, network_1, true_yolo_1, true_boxes])
+
+ loss_yolo_2 = YoloLayer(anchors[6:12],
+ [2*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[1],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, network_2, true_yolo_2, true_boxes])
+
+ loss_yolo_3 = YoloLayer(anchors[:6],
+ [4*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[2],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, network_3, true_yolo_3, true_boxes])
+
+ train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
+ infer_model = Model(input_image, [network_1, network_2, network_3])
+
+ return [train_model, infer_model]
+
+
+def tiny_yolov3_main(input, num_anchors, num_classes):
+
+ network_1 = NetworkConv2D_BN_Leaky(input=input, channels=16, kernel_size=(3,3) )
+ network_1 = MaxPool2D(pool_size=(2,2), strides=(2,2), padding="same")(network_1)
+ network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=32, kernel_size=(3, 3))
+ network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
+ network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=64, kernel_size=(3, 3))
+ network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
+ network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=128, kernel_size=(3, 3))
+ network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
+ network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=256, kernel_size=(3, 3))
+
+ network_2 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
+ network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=512, kernel_size=(3, 3))
+ network_2 = MaxPool2D(pool_size=(2, 2), strides=(1, 1), padding="same")(network_2)
+ network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=1024, kernel_size=(3, 3))
+ network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=256, kernel_size=(1, 1))
+
+ network_3 = NetworkConv2D_BN_Leaky(input=network_2, channels=512, kernel_size=(3, 3))
+ network_3 = Conv2D(num_anchors * (num_classes + 5), kernel_size=(1,1))(network_3)
+
+ network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=128, kernel_size=(1, 1))
+ network_2 = UpSampling2D(2)(network_2)
+
+ network_4 = Concatenate()([network_2, network_1])
+ network_4 = NetworkConv2D_BN_Leaky(input=network_4, channels=256, kernel_size=(3, 3))
+ network_4 = Conv2D(num_anchors * (num_classes + 5), kernel_size=(1,1))(network_4)
+
+ return Model(input, [network_3, network_4])
+
+def dummy_loss(y_true, y_pred):
+ return tf.sqrt(tf.reduce_sum(y_pred))
\ No newline at end of file
diff --git a/imageai_tf_deprecated/Detection/__init__.py b/imageai_tf_deprecated/Detection/__init__.py
new file mode 100644
index 00000000..f1fdf4c8
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/__init__.py
@@ -0,0 +1,984 @@
+import cv2
+from imageai.Detection.keras_retinanet import models as retinanet_models
+from imageai.Detection.keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
+from imageai.Detection.keras_retinanet.utils.visualization import draw_box, draw_caption
+import matplotlib.pyplot as plt
+import matplotlib.image as pltimage
+import numpy as np
+import tensorflow as tf
+import os
+from tensorflow.keras import backend as K
+from tensorflow.keras.layers import Input
+from PIL import Image
+import colorsys
+import warnings
+
+from imageai.Detection.YOLO.yolov3 import tiny_yolov3_main, yolov3_main
+from imageai.Detection.YOLO.utils import letterbox_image, yolo_eval, preprocess_input, retrieve_yolo_detections, draw_boxes
+
+
+
+class ObjectDetection:
+ """
+ This is the object detection class for images in the ImageAI library. It provides support for RetinaNet
+ , YOLOv3 and TinyYOLOv3 object detection networks . After instantiating this class, you can set it's properties and
+ make object detections using it's pre-defined functions.
+
+ The following functions are required to be called before object detection can be made
+ * setModelPath()
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTypeAsTinyYOLOv3()]
+ * loadModel() [This must be called once only before performing object detection]
+
+ Once the above functions have been called, you can call the detectObjectsFromImage() function of
+ the object detection instance object at anytime to obtain observable objects in any image.
+ """
+
+ def __init__(self):
+ self.__modelType = ""
+ self.modelPath = ""
+ self.__modelPathAdded = False
+ self.__modelLoaded = False
+ self.__model_collection = []
+
+ # Instance variables for RetinaNet Model
+ self.__input_image_min = 1333
+ self.__input_image_max = 800
+
+ self.numbers_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
+ 6: 'train',
+ 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
+ 12: 'parking meter',
+ 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
+ 20: 'elephant',
+ 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
+ 27: 'tie',
+ 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball',
+ 33: 'kite',
+ 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
+ 38: 'tennis racket',
+ 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon',
+ 45: 'bowl',
+ 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
+ 52: 'hot dog',
+ 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
+ 59: 'bed',
+ 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote',
+ 66: 'keyboard',
+ 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
+ 72: 'refrigerator',
+ 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
+ 78: 'hair dryer',
+ 79: 'toothbrush'}
+
+ # Unique instance variables for YOLOv3 and TinyYOLOv3 model
+ self.__yolo_iou = 0.45
+ self.__yolo_score = 0.1
+ self.__nms_thresh = 0.45
+ self.__yolo_anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
+ self.__yolo_model_image_size = (416, 416)
+ self.__yolo_boxes, self.__yolo_scores, self.__yolo_classes = "", "", ""
+ self.__tiny_yolo_anchors = [[81, 82, 135, 169, 344, 319], [10, 14, 23, 27, 37, 58]]
+ self.__box_color = (112, 19, 24)
+
+
+ def setModelTypeAsRetinaNet(self):
+ """
+ 'setModelTypeAsRetinaNet()' is used to set the model type to the RetinaNet model
+ for the video object detection instance instance object .
+ :return:
+ """
+ self.__modelType = "retinanet"
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
+ for the video object detection instance instance object .
+ :return:
+ """
+
+ self.__modelType = "yolov3"
+
+ def setModelTypeAsTinyYOLOv3(self):
+ """
+ 'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model
+ for the video object detection instance instance object .
+ :return:
+ """
+
+ self.__modelType = "tinyyolov3"
+
+ def setModelPath(self, model_path):
+ """
+ 'setModelPath()' function is required and is used to set the file path to a RetinaNet
+ object detection model trained on the COCO dataset.
+ :param model_path:
+ :return:
+ """
+
+ if (self.__modelPathAdded == False):
+ self.modelPath = model_path
+ self.__modelPathAdded = True
+
+ def loadModel(self, detection_speed="normal"):
+ """
+ 'loadModel()' function is required and is used to load the model structure into the program from the file path defined
+ in the setModelPath() function. This function receives an optional value which is "detection_speed".
+ The value is used to reduce the time it takes to detect objects in an image, down to about a 10% of the normal time, with
+ with just slight reduction in the number of objects detected.
+
+
+ * prediction_speed (optional); Acceptable values are "normal", "fast", "faster", "fastest" and "flash"
+
+ :param detection_speed:
+ :return:
+ """
+
+ if (self.__modelType == "retinanet"):
+ if (detection_speed == "normal"):
+ self.__input_image_min = 800
+ self.__input_image_max = 1333
+ elif (detection_speed == "fast"):
+ self.__input_image_min = 400
+ self.__input_image_max = 700
+ elif (detection_speed == "faster"):
+ self.__input_image_min = 300
+ self.__input_image_max = 500
+ elif (detection_speed == "fastest"):
+ self.__input_image_min = 200
+ self.__input_image_max = 350
+ elif (detection_speed == "flash"):
+ self.__input_image_min = 100
+ self.__input_image_max = 250
+ elif (self.__modelType == "yolov3"):
+ if (detection_speed == "normal"):
+ self.__yolo_model_image_size = (416, 416)
+ elif (detection_speed == "fast"):
+ self.__yolo_model_image_size = (320, 320)
+ elif (detection_speed == "faster"):
+ self.__yolo_model_image_size = (208, 208)
+ elif (detection_speed == "fastest"):
+ self.__yolo_model_image_size = (128, 128)
+ elif (detection_speed == "flash"):
+ self.__yolo_model_image_size = (96, 96)
+
+ elif (self.__modelType == "tinyyolov3"):
+ if (detection_speed == "normal"):
+ self.__yolo_model_image_size = (832, 832)
+ elif (detection_speed == "fast"):
+ self.__yolo_model_image_size = (576, 576)
+ elif (detection_speed == "faster"):
+ self.__yolo_model_image_size = (416, 416)
+ elif (detection_speed == "fastest"):
+ self.__yolo_model_image_size = (320, 320)
+ elif (detection_speed == "flash"):
+ self.__yolo_model_image_size = (272, 272)
+
+ if (self.__modelLoaded == False):
+ if (self.__modelType == ""):
+ raise ValueError("You must set a valid model type before loading the model.")
+ elif (self.__modelType == "retinanet"):
+ model = retinanet_models.load_model(self.modelPath, backbone_name='resnet50')
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+ elif (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):
+
+ input_image = Input(shape=(None, None, 3))
+
+ if self.__modelType == "yolov3":
+ model = yolov3_main(input_image, len(self.__yolo_anchors),
+ len(self.numbers_to_names.keys()))
+ else:
+ model = tiny_yolov3_main(input_image, 3,
+ len(self.numbers_to_names.keys()))
+
+ model.load_weights(self.modelPath)
+
+ self.__model_collection.append(model)
+ self.__modelLoaded = True
+
+ def detectObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
+ extract_detected_objects=False, minimum_percentage_probability=50,
+ display_percentage_probability=True, display_object_name=True,
+ display_box=True, thread_safe=False, custom_objects=None):
+ """
+ 'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
+ * input_image , which can be a filepath, image numpy array or image file stream
+ * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
+ * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
+ * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
+ * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
+ * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
+ * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
+
+
+ The values returned by this function depends on the parameters parsed. The possible values returnable
+ are stated as below
+ - If extract_detected_objects = False or at its default value and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = False or at its default value and output_type = 'array' ,
+ Then the function will return:
+
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = True and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+ 2. an array of string paths to the image of each object extracted from the image
+
+ - If extract_detected_objects = True and output_type = 'array', the the function will return:
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (list of x1,y1,x2 and y2 coordinates)
+ 3. an array of numpy arrays of each object detected in the image
+
+
+ :param input_image:
+ :param output_image_path:
+ :param input_type:
+ :param output_type:
+ :param extract_detected_objects:
+ :param minimum_percentage_probability:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param thread_safe:
+ :return image_frame:
+ :return output_objects_array:
+ :return detected_objects_image_array:
+ """
+
+ if (self.__modelLoaded == False):
+ raise ValueError("You must call the loadModel() function before making object detection.")
+ elif (self.__modelLoaded == True):
+ try:
+
+ model_detections = list()
+ detections = list()
+ image_copy = None
+
+ detected_objects_image_array = []
+ min_probability = minimum_percentage_probability / 100
+
+ if (input_type == "file"):
+ input_image = cv2.imread(input_image)
+ elif (input_type == "array"):
+ input_image = np.array(input_image)
+
+ detected_copy = input_image
+ image_copy = input_image
+
+ if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):
+
+ image_h, image_w, _ = detected_copy.shape
+ detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size)
+
+ model = self.__model_collection[0]
+ yolo_result = model.predict(detected_copy)
+
+ model_detections = retrieve_yolo_detections(yolo_result,
+ self.__yolo_anchors,
+ min_probability,
+ self.__nms_thresh,
+ self.__yolo_model_image_size,
+ (image_w, image_h),
+ self.numbers_to_names)
+
+ elif (self.__modelType == "retinanet"):
+ detected_copy = preprocess_image(detected_copy)
+ detected_copy, scale = resize_image(detected_copy)
+
+ model = self.__model_collection[0]
+ boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0))
+
+
+ boxes /= scale
+
+ for box, score, label in zip(boxes[0], scores[0], labels[0]):
+ # scores are sorted so we can break
+ if score < min_probability:
+ break
+
+ detection_dict = dict()
+ detection_dict["name"] = self.numbers_to_names[label]
+ detection_dict["percentage_probability"] = score * 100
+ detection_dict["box_points"] = box.astype(int).tolist()
+ model_detections.append(detection_dict)
+
+ counting = 0
+ objects_dir = output_image_path + "-objects"
+
+ for detection in model_detections:
+ counting += 1
+ label = detection["name"]
+ percentage_probability = detection["percentage_probability"]
+ box_points = detection["box_points"]
+
+ if (custom_objects is not None):
+ if (custom_objects[label] != "valid"):
+ continue
+
+ detections.append(detection)
+
+ if display_object_name == False:
+ label = None
+
+ if display_percentage_probability == False:
+ percentage_probability = None
+
+
+ image_copy = draw_boxes(image_copy,
+ box_points,
+ display_box,
+ label,
+ percentage_probability,
+ self.__box_color)
+
+
+
+ if (extract_detected_objects == True):
+ splitted_copy = image_copy.copy()[box_points[1]:box_points[3],
+ box_points[0]:box_points[2]]
+ if (output_type == "file"):
+ if (os.path.exists(objects_dir) == False):
+ os.mkdir(objects_dir)
+ splitted_image_path = os.path.join(objects_dir,
+ detection["name"] + "-" + str(
+ counting) + ".jpg")
+ cv2.imwrite(splitted_image_path, splitted_copy)
+ detected_objects_image_array.append(splitted_image_path)
+ elif (output_type == "array"):
+ detected_objects_image_array.append(splitted_copy)
+
+
+ if (output_type == "file"):
+ cv2.imwrite(output_image_path, image_copy)
+
+ if (extract_detected_objects == True):
+ if (output_type == "file"):
+ return detections, detected_objects_image_array
+ elif (output_type == "array"):
+ return image_copy, detections, detected_objects_image_array
+
+ else:
+ if (output_type == "file"):
+ return detections
+ elif (output_type == "array"):
+ return image_copy, detections
+
+ except:
+ raise ValueError(
+ "Ensure you specified correct input image, input type, output type and/or output image path ")
+
+ def CustomObjects(self, person=False, bicycle=False, car=False, motorcycle=False, airplane=False,
+ bus=False, train=False, truck=False, boat=False, traffic_light=False, fire_hydrant=False,
+ stop_sign=False,
+ parking_meter=False, bench=False, bird=False, cat=False, dog=False, horse=False, sheep=False,
+ cow=False, elephant=False, bear=False, zebra=False,
+ giraffe=False, backpack=False, umbrella=False, handbag=False, tie=False, suitcase=False,
+ frisbee=False, skis=False, snowboard=False,
+ sports_ball=False, kite=False, baseball_bat=False, baseball_glove=False, skateboard=False,
+ surfboard=False, tennis_racket=False,
+ bottle=False, wine_glass=False, cup=False, fork=False, knife=False, spoon=False, bowl=False,
+ banana=False, apple=False, sandwich=False, orange=False,
+ broccoli=False, carrot=False, hot_dog=False, pizza=False, donut=False, cake=False, chair=False,
+ couch=False, potted_plant=False, bed=False,
+ dining_table=False, toilet=False, tv=False, laptop=False, mouse=False, remote=False,
+ keyboard=False, cell_phone=False, microwave=False,
+ oven=False, toaster=False, sink=False, refrigerator=False, book=False, clock=False, vase=False,
+ scissors=False, teddy_bear=False, hair_dryer=False,
+ toothbrush=False):
+
+ """
+ The 'CustomObjects()' function allows you to handpick the type of objects you want to detect
+ from an image. The objects are pre-initiated in the function variables and predefined as 'False',
+ which you can easily set to true for any number of objects available. This function
+ returns a dictionary which must be parsed into the 'detectCustomObjectsFromImage()'. Detecting
+ custom objects only happens when you call the function 'detectCustomObjectsFromImage()'
+
+
+ * true_values_of_objects (array); Acceptable values are 'True' and False for all object values present
+
+ :param boolean_values:
+ :return: custom_objects_dict
+ """
+
+ custom_objects_dict = {}
+ input_values = [person, bicycle, car, motorcycle, airplane,
+ bus, train, truck, boat, traffic_light, fire_hydrant, stop_sign,
+ parking_meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+ giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+ sports_ball, kite, baseball_bat, baseball_glove, skateboard, surfboard, tennis_racket,
+ bottle, wine_glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+ broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch, potted_plant, bed,
+ dining_table, toilet, tv, laptop, mouse, remote, keyboard, cell_phone, microwave,
+ oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy_bear, hair_dryer,
+ toothbrush]
+ actual_labels = ["person", "bicycle", "car", "motorcycle", "airplane",
+ "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign",
+ "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+ "zebra",
+ "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
+ "snowboard",
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+ "tennis racket",
+ "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
+ "orange",
+ "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
+ "bed",
+ "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+ "microwave",
+ "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+ "hair dryer",
+ "toothbrush"]
+
+ for input_value, actual_label in zip(input_values, actual_labels):
+ if (input_value == True):
+ custom_objects_dict[actual_label] = "valid"
+ else:
+ custom_objects_dict[actual_label] = "invalid"
+
+ return custom_objects_dict
+
+ def detectCustomObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
+ extract_detected_objects=False, minimum_percentage_probability=50,
+ display_percentage_probability=True, display_object_name=True,
+ display_box=True, thread_safe=False, custom_objects=None):
+
+ warnings.warn("'detectCustomObjectsFromImage()' function has been deprecated and will be removed in future versions of ImageAI. \n Kindly use 'detectObjectsFromImage()' ",
+ DeprecationWarning, stacklevel=2)
+
+ return self.detectObjectsFromImage(input_image=input_image,
+ output_image_path=output_image_path,
+ input_type=input_type,
+ output_type=output_type,
+ extract_detected_objects=extract_detected_objects,
+ minimum_percentage_probability=minimum_percentage_probability,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name,
+ display_box=display_box,
+ thread_safe=thread_safe,
+ custom_objects=custom_objects)
+
+
+class VideoObjectDetection:
+ """
+ This is the object detection class for videos and camera live stream inputs in the ImageAI library. It provides support for RetinaNet,
+ YOLOv3 and TinyYOLOv3 object detection networks. After instantiating this class, you can set it's properties and
+ make object detections using it's pre-defined functions.
+
+ The following functions are required to be called before object detection can be made
+ * setModelPath()
+ * At least of of the following and it must correspond to the model set in the setModelPath()
+ [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTinyYOLOv3()]
+ * loadModel() [This must be called once only before performing object detection]
+
+ Once the above functions have been called, you can call the detectObjectsFromVideo() function
+ or the detectCustomObjectsFromVideo() of the object detection instance object at anytime to
+ obtain observable objects in any video or camera live stream.
+ """
+
+ def __init__(self):
+ self.__modelType = ""
+ self.modelPath = ""
+ self.__modelPathAdded = False
+ self.__modelLoaded = False
+ self.__detector = None
+ self.__input_image_min = 1333
+ self.__input_image_max = 800
+ self.__detection_storage = None
+
+
+ self.numbers_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
+ 6: 'train',
+ 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
+ 12: 'parking meter',
+ 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
+ 20: 'elephant',
+ 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
+ 27: 'tie',
+ 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball',
+ 33: 'kite',
+ 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
+ 38: 'tennis racket',
+ 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon',
+ 45: 'bowl',
+ 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
+ 52: 'hot dog',
+ 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
+ 59: 'bed',
+ 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote',
+ 66: 'keyboard',
+ 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
+ 72: 'refrigerator',
+ 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
+ 78: 'hair dryer',
+ 79: 'toothbrush'}
+
+ def setModelTypeAsRetinaNet(self):
+ """
+ 'setModelTypeAsRetinaNet()' is used to set the model type to the RetinaNet model
+ for the video object detection instance instance object .
+ :return:
+ """
+ self.__modelType = "retinanet"
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
+ for the video object detection instance instance object .
+ :return:
+ """
+ self.__modelType = "yolov3"
+
+ def setModelTypeAsTinyYOLOv3(self):
+ """
+ 'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model
+ for the video object detection instance instance object .
+ :return:
+ """
+ self.__modelType = "tinyyolov3"
+
+ def setModelPath(self, model_path):
+ """
+ 'setModelPath()' function is required and is used to set the file path to a RetinaNet,
+ YOLOv3 or TinyYOLOv3 object detection model trained on the COCO dataset.
+ :param model_path:
+ :return:
+ """
+
+ if (self.__modelPathAdded == False):
+ self.modelPath = model_path
+ self.__modelPathAdded = True
+
+ def loadModel(self, detection_speed="normal"):
+ """
+ 'loadModel()' function is required and is used to load the model structure into the program from the file path defined
+ in the setModelPath() function. This function receives an optional value which is "detection_speed".
+ The value is used to reduce the time it takes to detect objects in an image, down to about a 10% of the normal time, with
+ with just slight reduction in the number of objects detected.
+
+
+ * prediction_speed (optional); Acceptable values are "normal", "fast", "faster", "fastest" and "flash"
+
+ :param detection_speed:
+ :return:
+ """
+
+ if (self.__modelLoaded == False):
+
+ frame_detector = ObjectDetection()
+
+ if (self.__modelType == "retinanet"):
+ frame_detector.setModelTypeAsRetinaNet()
+ elif (self.__modelType == "yolov3"):
+ frame_detector.setModelTypeAsYOLOv3()
+ elif (self.__modelType == "tinyyolov3"):
+ frame_detector.setModelTypeAsTinyYOLOv3()
+ frame_detector.setModelPath(self.modelPath)
+ frame_detector.loadModel(detection_speed)
+ self.__detector = frame_detector
+ self.__modelLoaded = True
+
+
+ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
+ frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
+ display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
+ per_frame_function=None, per_second_function=None, per_minute_function=None,
+ video_complete_function=None, return_detected_frame=False, detection_timeout = None,
+ thread_safe=False, custom_objects=None):
+
+ """
+ 'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictionary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ as the fifth value into the function
+
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+ * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
+
+
+ :param input_file_path:
+ :param camera_input
+ :param output_file_path:
+ :param save_detected_video:
+ :param frames_per_second:
+ :param frame_detection_interval:
+ :param minimum_percentage_probability:
+ :param log_progress:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param per_frame_function:
+ :param per_second_function:
+ :param per_minute_function:
+ :param video_complete_function:
+ :param return_detected_frame:
+ :param detection_timeout:
+ :param thread_safe:
+ :return output_video_filepath:
+ :return counting:
+ :return output_objects_array:
+ :return output_objects_count:
+ :return detected_copy:
+ :return this_second_output_object_array:
+ :return this_second_counting_array:
+ :return this_second_counting:
+ :return this_minute_output_object_array:
+ :return this_minute_counting_array:
+ :return this_minute_counting:
+ :return this_video_output_object_array:
+ :return this_video_counting_array:
+ :return this_video_counting:
+ """
+
+ if (input_file_path == "" and camera_input == None):
+ raise ValueError(
+ "You must set 'input_file_path' to a valid video file, or set 'camera_input' to a valid camera")
+ elif (save_detected_video == True and output_file_path == ""):
+ raise ValueError(
+ "You must set 'output_video_filepath' to a valid video file name, in which the detected video will be saved. If you don't intend to save the detected video, set 'save_detected_video=False'")
+
+ else:
+ try:
+
+ output_frames_dict = {}
+ output_frames_count_dict = {}
+
+ input_video = cv2.VideoCapture(input_file_path)
+ if (camera_input != None):
+ input_video = camera_input
+
+ output_video_filepath = output_file_path + '.avi'
+
+ frame_width = int(input_video.get(3))
+ frame_height = int(input_video.get(4))
+ output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+ frames_per_second,
+ (frame_width, frame_height))
+
+ counting = 0
+
+ detection_timeout_count = 0
+ video_frames_count = 0
+
+ while (input_video.isOpened()):
+ ret, frame = input_video.read()
+
+ if (ret == True):
+
+ video_frames_count += 1
+ if (detection_timeout != None):
+ if ((video_frames_count % frames_per_second) == 0):
+ detection_timeout_count += 1
+
+ if (detection_timeout_count >= detection_timeout):
+ break
+
+ output_objects_array = []
+
+ counting += 1
+
+ if (log_progress == True):
+ print("Processing Frame : ", str(counting))
+
+ detected_copy = frame.copy()
+
+ check_frame_interval = counting % frame_detection_interval
+
+ if (counting == 1 or check_frame_interval == 0):
+ try:
+ detected_copy, output_objects_array = self.__detector.detectObjectsFromImage(
+ input_image=frame, input_type="array", output_type="array",
+ minimum_percentage_probability=minimum_percentage_probability,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name,
+ display_box=display_box,
+ custom_objects=custom_objects)
+ except:
+ None
+
+ output_frames_dict[counting] = output_objects_array
+
+ output_objects_count = {}
+ for eachItem in output_objects_array:
+ eachItemName = eachItem["name"]
+ try:
+ output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
+ except:
+ output_objects_count[eachItemName] = 1
+
+ output_frames_count_dict[counting] = output_objects_count
+
+
+ if (save_detected_video == True):
+ output_video.write(detected_copy)
+
+ if (counting == 1 or check_frame_interval == 0):
+ if (per_frame_function != None):
+ if (return_detected_frame == True):
+ per_frame_function(counting, output_objects_array, output_objects_count,
+ detected_copy)
+ elif (return_detected_frame == False):
+ per_frame_function(counting, output_objects_array, output_objects_count)
+
+ if (per_second_function != None):
+ if (counting != 1 and (counting % frames_per_second) == 0):
+
+ this_second_output_object_array = []
+ this_second_counting_array = []
+ this_second_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - frames_per_second)):
+ this_second_output_object_array.append(output_frames_dict[aa + 1])
+ this_second_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_second_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_second_counting[eachItem] = this_second_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_second_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_second_counting:
+ this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)
+
+ if (return_detected_frame == True):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting, detected_copy)
+
+ elif (return_detected_frame == False):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting)
+
+ if (per_minute_function != None):
+
+ if (counting != 1 and (counting % (frames_per_second * 60)) == 0):
+
+ this_minute_output_object_array = []
+ this_minute_counting_array = []
+ this_minute_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - (frames_per_second * 60))):
+ this_minute_output_object_array.append(output_frames_dict[aa + 1])
+ this_minute_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_minute_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_minute_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_minute_counting:
+ this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))
+
+ if (return_detected_frame == True):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting, detected_copy)
+
+ elif (return_detected_frame == False):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting)
+
+
+ else:
+ break
+
+ if (video_complete_function != None):
+
+ this_video_output_object_array = []
+ this_video_counting_array = []
+ this_video_counting = {}
+
+ for aa in range(counting):
+ this_video_output_object_array.append(output_frames_dict[aa + 1])
+ this_video_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_video_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_video_counting[eachItem] = this_video_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_video_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_video_counting:
+ this_video_counting[eachCountingItem] = int(this_video_counting[eachCountingItem] / counting)
+
+ video_complete_function(this_video_output_object_array, this_video_counting_array,
+ this_video_counting)
+
+ input_video.release()
+ output_video.release()
+
+ if (save_detected_video == True):
+ return output_video_filepath
+
+ except:
+ raise ValueError(
+ "An error occured. It may be that your input video is invalid. Ensure you specified a proper string value for 'output_file_path' is 'save_detected_video' is not False. "
+ "Also ensure your per_frame, per_second, per_minute or video_complete_analysis function is properly configured to receive the right parameters. ")
+
+ def CustomObjects(self, person=False, bicycle=False, car=False, motorcycle=False, airplane=False,
+ bus=False, train=False, truck=False, boat=False, traffic_light=False, fire_hydrant=False,
+ stop_sign=False,
+ parking_meter=False, bench=False, bird=False, cat=False, dog=False, horse=False, sheep=False,
+ cow=False, elephant=False, bear=False, zebra=False,
+ giraffe=False, backpack=False, umbrella=False, handbag=False, tie=False, suitcase=False,
+ frisbee=False, skis=False, snowboard=False,
+ sports_ball=False, kite=False, baseball_bat=False, baseball_glove=False, skateboard=False,
+ surfboard=False, tennis_racket=False,
+ bottle=False, wine_glass=False, cup=False, fork=False, knife=False, spoon=False, bowl=False,
+ banana=False, apple=False, sandwich=False, orange=False,
+ broccoli=False, carrot=False, hot_dog=False, pizza=False, donut=False, cake=False, chair=False,
+ couch=False, potted_plant=False, bed=False,
+ dining_table=False, toilet=False, tv=False, laptop=False, mouse=False, remote=False,
+ keyboard=False, cell_phone=False, microwave=False,
+ oven=False, toaster=False, sink=False, refrigerator=False, book=False, clock=False, vase=False,
+ scissors=False, teddy_bear=False, hair_dryer=False,
+ toothbrush=False):
+
+ """
+ The 'CustomObjects()' function allows you to handpick the type of objects you want to detect
+ from a video. The objects are pre-initiated in the function variables and predefined as 'False',
+ which you can easily set to true for any number of objects available. This function
+ returns a dictionary which must be parsed into the 'detectCustomObjectsFromVideo()'. Detecting
+ custom objects only happens when you call the function 'detectCustomObjectsFromVideo()'
+
+
+ * true_values_of_objects (array); Acceptable values are 'True' and False for all object values present
+
+ :param boolean_values:
+ :return: custom_objects_dict
+ """
+
+ custom_objects_dict = {}
+ input_values = [person, bicycle, car, motorcycle, airplane,
+ bus, train, truck, boat, traffic_light, fire_hydrant, stop_sign,
+ parking_meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
+ giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
+ sports_ball, kite, baseball_bat, baseball_glove, skateboard, surfboard, tennis_racket,
+ bottle, wine_glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
+ broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch, potted_plant, bed,
+ dining_table, toilet, tv, laptop, mouse, remote, keyboard, cell_phone, microwave,
+ oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy_bear, hair_dryer,
+ toothbrush]
+ actual_labels = ["person", "bicycle", "car", "motorcycle", "airplane",
+ "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign",
+ "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
+ "zebra",
+ "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
+ "snowboard",
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+ "tennis racket",
+ "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
+ "orange",
+ "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
+ "bed",
+ "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+ "microwave",
+ "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+ "hair dryer",
+ "toothbrush"]
+
+ for input_value, actual_label in zip(input_values, actual_labels):
+ if (input_value == True):
+ custom_objects_dict[actual_label] = "valid"
+ else:
+ custom_objects_dict[actual_label] = "invalid"
+
+ return custom_objects_dict
+
+ def detectCustomObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
+ frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
+ display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
+ per_frame_function=None, per_second_function=None, per_minute_function=None,
+ video_complete_function=None, return_detected_frame=False, detection_timeout = None,
+ thread_safe=False, custom_objects=None):
+
+
+ return self.detectObjectsFromVideo(input_file_path=input_file_path,
+ camera_input=camera_input,
+ output_file_path=output_file_path,
+ frames_per_second=frames_per_second,
+ frame_detection_interval=frame_detection_interval,
+ minimum_percentage_probability=minimum_percentage_probability,
+ log_progress=log_progress,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name,
+ display_box=display_box,
+ save_detected_video=save_detected_video,
+ per_frame_function=per_frame_function,
+ per_second_function=per_second_function,
+ per_minute_function=per_minute_function,
+ video_complete_function=video_complete_function,
+ return_detected_frame=return_detected_frame,
+ detection_timeout = detection_timeout,
+ thread_safe=thread_safe,
+ custom_objects=custom_objects)
\ No newline at end of file
diff --git a/imageai/Prediction/InceptionV3/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/__init__.py
similarity index 100%
rename from imageai/Prediction/InceptionV3/__init__.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/__init__.py
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/backend/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/backend/__init__.py
new file mode 100644
index 00000000..0546e5ed
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/backend/__init__.py
@@ -0,0 +1 @@
+from .backend import * # noqa: F401,F403
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/backend/backend.py b/imageai_tf_deprecated/Detection/keras_retinanet/backend/backend.py
new file mode 100644
index 00000000..129d9652
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/backend/backend.py
@@ -0,0 +1,119 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import tensorflow
+from tensorflow import keras
+
+
+def bbox_transform_inv(boxes, deltas, mean=None, std=None):
+ """ Applies deltas (usually regression results) to boxes (usually anchors).
+
+ Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed.
+ The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes.
+
+ Args
+ boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2).
+ deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height.
+ mean : The mean value used when computing deltas (defaults to [0, 0, 0, 0]).
+ std : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]).
+
+ Returns
+ A np.array of the same shape as boxes, but with deltas applied to each box.
+ The mean and std are used during training to normalize the regression values (networks love normalization).
+ """
+ if mean is None:
+ mean = [0, 0, 0, 0]
+ if std is None:
+ std = [0.2, 0.2, 0.2, 0.2]
+
+ width = boxes[:, :, 2] - boxes[:, :, 0]
+ height = boxes[:, :, 3] - boxes[:, :, 1]
+
+ x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width
+ y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height
+ x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width
+ y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height
+
+ pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2)
+
+ return pred_boxes
+
+
+def shift(shape, stride, anchors):
+ """ Produce shifted anchors based on shape of the map and stride size.
+
+ Args
+ shape : Shape to shift the anchors over.
+ stride : Stride to shift the anchors with over the shape.
+ anchors: The anchors to apply at each location.
+ """
+ shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
+ shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
+
+ shift_x, shift_y = tensorflow.meshgrid(shift_x, shift_y)
+ shift_x = keras.backend.reshape(shift_x, [-1])
+ shift_y = keras.backend.reshape(shift_y, [-1])
+
+ shifts = keras.backend.stack([
+ shift_x,
+ shift_y,
+ shift_x,
+ shift_y
+ ], axis=0)
+
+ shifts = keras.backend.transpose(shifts)
+ number_of_anchors = keras.backend.shape(anchors)[0]
+
+ k = keras.backend.shape(shifts)[0] # number of base points = feat_h * feat_w
+
+ shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast(keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx())
+ shifted_anchors = keras.backend.reshape(shifted_anchors, [k * number_of_anchors, 4])
+
+ return shifted_anchors
+
+
+def map_fn(*args, **kwargs):
+ """ See https://www.tensorflow.org/api_docs/python/tf/map_fn .
+ """
+
+ if "shapes" in kwargs:
+ shapes = kwargs.pop("shapes")
+ dtype = kwargs.pop("dtype")
+ sig = [tensorflow.TensorSpec(shapes[i], dtype=t) for i, t in
+ enumerate(dtype)]
+
+ # Try to use the new feature fn_output_signature in TF 2.3, use fallback if this is not available
+ try:
+ return tensorflow.map_fn(*args, **kwargs, fn_output_signature=sig)
+ except TypeError:
+ kwargs["dtype"] = dtype
+
+ return tensorflow.map_fn(*args, **kwargs)
+
+
+def resize_images(images, size, method='bilinear', align_corners=False):
+ """ See https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/image/resize_images .
+
+ Args
+ method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area').
+ """
+ methods = {
+ 'bilinear': tensorflow.image.ResizeMethod.BILINEAR,
+ 'nearest' : tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR,
+ 'bicubic' : tensorflow.image.ResizeMethod.BICUBIC,
+ 'area' : tensorflow.image.ResizeMethod.AREA,
+ }
+ return tensorflow.compat.v1.image.resize_images(images, size, methods[method], align_corners)
diff --git a/imageai/Prediction/ResNet/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/bin/__init__.py
similarity index 100%
rename from imageai/Prediction/ResNet/__init__.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/bin/__init__.py
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/bin/convert_model.py b/imageai_tf_deprecated/Detection/keras_retinanet/bin/convert_model.py
new file mode 100644
index 00000000..381fb157
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/bin/convert_model.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import argparse
+import os
+import sys
+
+# Allow relative imports when being executed as script.
+if __name__ == "__main__" and __package__ is None:
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+ import keras_retinanet.bin # noqa: F401
+ __package__ = "keras_retinanet.bin"
+
+# Change these to absolute imports if you copy this script outside the keras_retinanet package.
+from .. import models
+from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
+from ..utils.gpu import setup_gpu
+from ..utils.tf_version import check_tf_version
+
+
+def parse_args(args):
+ parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.')
+
+ parser.add_argument('model_in', help='The model to convert.')
+ parser.add_argument('model_out', help='Path to save the converted model to.')
+ parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50')
+ parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false')
+ parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false')
+ parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
+ parser.add_argument('--nms-threshold', help='Value for non maximum suppression threshold.', type=float, default=0.5)
+ parser.add_argument('--score-threshold', help='Threshold for prefiltering boxes.', type=float, default=0.05)
+ parser.add_argument('--max-detections', help='Maximum number of detections to keep.', type=int, default=300)
+ parser.add_argument('--parallel-iterations', help='Number of batch items to process in parallel.', type=int, default=32)
+
+ return parser.parse_args(args)
+
+
+def main(args=None):
+ # parse arguments
+ if args is None:
+ args = sys.argv[1:]
+ args = parse_args(args)
+
+ # make sure tensorflow is the minimum required version
+ check_tf_version()
+
+ # set modified tf session to avoid using the GPUs
+ setup_gpu('cpu')
+
+ # optionally load config parameters
+ anchor_parameters = None
+ pyramid_levels = None
+ if args.config:
+ args.config = read_config_file(args.config)
+ if 'anchor_parameters' in args.config:
+ anchor_parameters = parse_anchor_parameters(args.config)
+
+ if 'pyramid_levels' in args.config:
+ pyramid_levels = parse_pyramid_levels(args.config)
+
+ # load the model
+ model = models.load_model(args.model_in, backbone_name=args.backbone)
+
+ # check if this is indeed a training model
+ models.check_training_model(model)
+
+ # convert the model
+ model = models.convert_model(
+ model,
+ nms=args.nms,
+ class_specific_filter=args.class_specific_filter,
+ anchor_params=anchor_parameters,
+ pyramid_levels=pyramid_levels,
+ nms_threshold=args.nms_threshold,
+ score_threshold=args.score_threshold,
+ max_detections=args.max_detections,
+ parallel_iterations=args.parallel_iterations
+ )
+
+ # save model
+ model.save(args.model_out)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/bin/debug.py b/imageai_tf_deprecated/Detection/keras_retinanet/bin/debug.py
new file mode 100644
index 00000000..39185a95
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/bin/debug.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import argparse
+import os
+import sys
+import cv2
+
+# Set keycodes for changing images
+# 81, 83 are left and right arrows on linux in Ascii code (probably not needed)
+# 65361, 65363 are left and right arrows in linux
+# 2424832, 2555904 are left and right arrows on Windows
+# 110, 109 are 'n' and 'm' on mac, windows, linux
+# (unfortunately arrow keys not picked up on mac)
+leftkeys = (81, 110, 65361, 2424832)
+rightkeys = (83, 109, 65363, 2555904)
+
+# Allow relative imports when being executed as script.
+if __name__ == "__main__" and __package__ is None:
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+ import keras_retinanet.bin # noqa: F401
+ __package__ = "keras_retinanet.bin"
+
+# Change these to absolute imports if you copy this script outside the keras_retinanet package.
+from ..preprocessing.pascal_voc import PascalVocGenerator
+from ..preprocessing.csv_generator import CSVGenerator
+from ..preprocessing.kitti import KittiGenerator
+from ..preprocessing.open_images import OpenImagesGenerator
+from ..utils.anchors import anchors_for_shape, compute_gt_annotations
+from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
+from ..utils.image import random_visual_effect_generator
+from ..utils.tf_version import check_tf_version
+from ..utils.transform import random_transform_generator
+from ..utils.visualization import draw_annotations, draw_boxes, draw_caption
+
+
+def create_generator(args):
+ """ Create the data generators.
+
+ Args:
+ args: parseargs arguments object.
+ """
+ common_args = {
+ 'config' : args.config,
+ 'image_min_side' : args.image_min_side,
+ 'image_max_side' : args.image_max_side,
+ 'group_method' : args.group_method
+ }
+
+ # create random transform generator for augmenting training data
+ transform_generator = random_transform_generator(
+ min_rotation=-0.1,
+ max_rotation=0.1,
+ min_translation=(-0.1, -0.1),
+ max_translation=(0.1, 0.1),
+ min_shear=-0.1,
+ max_shear=0.1,
+ min_scaling=(0.9, 0.9),
+ max_scaling=(1.1, 1.1),
+ flip_x_chance=0.5,
+ flip_y_chance=0.5,
+ )
+
+ visual_effect_generator = random_visual_effect_generator(
+ contrast_range=(0.9, 1.1),
+ brightness_range=(-.1, .1),
+ hue_range=(-0.05, 0.05),
+ saturation_range=(0.95, 1.05)
+ )
+
+ if args.dataset_type == 'coco':
+ # import here to prevent unnecessary dependency on cocoapi
+ from ..preprocessing.coco import CocoGenerator
+
+ generator = CocoGenerator(
+ args.coco_path,
+ args.coco_set,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+ elif args.dataset_type == 'pascal':
+ generator = PascalVocGenerator(
+ args.pascal_path,
+ args.pascal_set,
+ image_extension=args.image_extension,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+ elif args.dataset_type == 'csv':
+ generator = CSVGenerator(
+ args.annotations,
+ args.classes,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+ elif args.dataset_type == 'oid':
+ generator = OpenImagesGenerator(
+ args.main_dir,
+ subset=args.subset,
+ version=args.version,
+ labels_filter=args.labels_filter,
+ parent_label=args.parent_label,
+ annotation_cache_dir=args.annotation_cache_dir,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+ elif args.dataset_type == 'kitti':
+ generator = KittiGenerator(
+ args.kitti_path,
+ subset=args.subset,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+ else:
+ raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
+
+ return generator
+
+
+def parse_args(args):
+ """ Parse the arguments.
+ """
+ parser = argparse.ArgumentParser(description='Debug script for a RetinaNet network.')
+ subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
+ subparsers.required = True
+
+ coco_parser = subparsers.add_parser('coco')
+ coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
+ coco_parser.add_argument('--coco-set', help='Name of the set to show (defaults to val2017).', default='val2017')
+
+ pascal_parser = subparsers.add_parser('pascal')
+ pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
+ pascal_parser.add_argument('--pascal-set', help='Name of the set to show (defaults to test).', default='test')
+ pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
+
+ kitti_parser = subparsers.add_parser('kitti')
+ kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')
+ kitti_parser.add_argument('subset', help='Argument for loading a subset from train/val.')
+
+ def csv_list(string):
+ return string.split(',')
+
+ oid_parser = subparsers.add_parser('oid')
+ oid_parser.add_argument('main_dir', help='Path to dataset directory.')
+ oid_parser.add_argument('subset', help='Argument for loading a subset from train/validation/test.')
+ oid_parser.add_argument('--version', help='The current dataset version is v4.', default='v4')
+ oid_parser.add_argument('--labels-filter', help='A list of labels to filter.', type=csv_list, default=None)
+ oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
+ oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)
+
+ csv_parser = subparsers.add_parser('csv')
+ csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
+ csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
+
+ parser.add_argument('--no-resize', help='Disable image resizing.', dest='resize', action='store_false')
+ parser.add_argument('--anchors', help='Show positive anchors on the image.', action='store_true')
+ parser.add_argument('--display-name', help='Display image name on the bottom left corner.', action='store_true')
+ parser.add_argument('--show-annotations', help='Show annotations on the image. Green annotations have anchors, red annotations don\'t and therefore don\'t contribute to training.', action='store_true')
+ parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
+ parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
+ parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
+ parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
+ parser.add_argument('--no-gui', help='Do not open a GUI window. Save images to an output directory instead.', action='store_true')
+ parser.add_argument('--output-dir', help='The output directory to save images to if --no-gui is specified.', default='.')
+ parser.add_argument('--flatten-output', help='Flatten the folder structure of saved output images into a single folder.', action='store_true')
+ parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
+
+ return parser.parse_args(args)
+
+
+def run(generator, args, anchor_params, pyramid_levels):
+ """ Main loop.
+
+ Args
+ generator: The generator to debug.
+ args: parseargs args object.
+ """
+ # display images, one at a time
+ i = 0
+ while True:
+ # load the data
+ image = generator.load_image(i)
+ annotations = generator.load_annotations(i)
+ if len(annotations['labels']) > 0 :
+ # apply random transformations
+ if args.random_transform:
+ image, annotations = generator.random_transform_group_entry(image, annotations)
+ image, annotations = generator.random_visual_effect_group_entry(image, annotations)
+
+ # resize the image and annotations
+ if args.resize:
+ image, image_scale = generator.resize_image(image)
+ annotations['bboxes'] *= image_scale
+
+ anchors = anchors_for_shape(image.shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
+ positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations['bboxes'])
+
+ # draw anchors on the image
+ if args.anchors:
+ draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=1)
+
+ # draw annotations on the image
+ if args.show_annotations:
+ # draw annotations in red
+ draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=generator.label_to_name)
+
+ # draw regressed anchors in green to override most red annotations
+ # result is that annotations without anchors are red, with anchors are green
+ draw_boxes(image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0))
+
+ # display name on the image
+ if args.display_name:
+ draw_caption(image, [0, image.shape[0]], os.path.basename(generator.image_path(i)))
+
+ # write to file and advance if no-gui selected
+ if args.no_gui:
+ output_path = make_output_path(args.output_dir, generator.image_path(i), flatten=args.flatten_output)
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
+ cv2.imwrite(output_path, image)
+ i += 1
+ if i == generator.size(): # have written all images
+ break
+ else:
+ continue
+
+ # if we are using the GUI, then show an image
+ cv2.imshow('Image', image)
+ key = cv2.waitKeyEx()
+
+ # press right for next image and left for previous (linux or windows, doesn't work for macOS)
+ # if you run macOS, press "n" or "m" (will also work on linux and windows)
+
+ if key in rightkeys:
+ i = (i + 1) % generator.size()
+ if key in leftkeys:
+ i -= 1
+ if i < 0:
+ i = generator.size() - 1
+
+ # press q or Esc to quit
+ if (key == ord('q')) or (key == 27):
+ return False
+
+ return True
+
+
+def make_output_path(output_dir, image_path, flatten = False):
+ """ Compute the output path for a debug image. """
+
+ # If the output hierarchy is flattened to a single folder, throw away all leading folders.
+ if flatten:
+ path = os.path.basename(image_path)
+
+ # Otherwise, make sure absolute paths are taken relative to the filesystem root.
+ else:
+ # Make sure to drop drive letters on Windows, otherwise relpath wil fail.
+ _, path = os.path.splitdrive(image_path)
+ if os.path.isabs(path):
+ path = os.path.relpath(path, '/')
+
+ # In all cases, append "_debug" to the filename, before the extension.
+ base, extension = os.path.splitext(path)
+ path = base + "_debug" + extension
+
+ # Finally, join the whole thing to the output directory.
+ return os.path.join(output_dir, path)
+
+
+def main(args=None):
+ # parse arguments
+ if args is None:
+ args = sys.argv[1:]
+ args = parse_args(args)
+
+ # make sure tensorflow is the minimum required version
+ check_tf_version()
+
+ # create the generator
+ generator = create_generator(args)
+
+ # optionally load config parameters
+ if args.config:
+ args.config = read_config_file(args.config)
+
+ # optionally load anchor parameters
+ anchor_params = None
+ if args.config and 'anchor_parameters' in args.config:
+ anchor_params = parse_anchor_parameters(args.config)
+
+ pyramid_levels = None
+ if args.config and 'pyramid_levels' in args.config:
+ pyramid_levels = parse_pyramid_levels(args.config)
+ # create the display window if necessary
+ if not args.no_gui:
+ cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
+
+ run(generator, args, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/bin/evaluate.py b/imageai_tf_deprecated/Detection/keras_retinanet/bin/evaluate.py
new file mode 100644
index 00000000..d0a7f88d
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/bin/evaluate.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import argparse
+import os
+import sys
+
+# Allow relative imports when being executed as script.
+if __name__ == "__main__" and __package__ is None:
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+ import keras_retinanet.bin # noqa: F401
+ __package__ = "keras_retinanet.bin"
+
+# Change these to absolute imports if you copy this script outside the keras_retinanet package.
+from .. import models
+from ..preprocessing.csv_generator import CSVGenerator
+from ..preprocessing.pascal_voc import PascalVocGenerator
+from ..utils.anchors import make_shapes_callback
+from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
+from ..utils.eval import evaluate
+from ..utils.gpu import setup_gpu
+from ..utils.tf_version import check_tf_version
+
+
+def create_generator(args, preprocess_image):
+ """ Create generators for evaluation.
+ """
+ common_args = {
+ 'config' : args.config,
+ 'image_min_side' : args.image_min_side,
+ 'image_max_side' : args.image_max_side,
+ 'no_resize' : args.no_resize,
+ 'preprocess_image' : preprocess_image,
+ 'group_method' : args.group_method
+ }
+
+ if args.dataset_type == 'coco':
+ # import here to prevent unnecessary dependency on cocoapi
+ from ..preprocessing.coco import CocoGenerator
+
+ validation_generator = CocoGenerator(
+ args.coco_path,
+ 'val2017',
+ shuffle_groups=False,
+ **common_args
+ )
+ elif args.dataset_type == 'pascal':
+ validation_generator = PascalVocGenerator(
+ args.pascal_path,
+ 'test',
+ image_extension=args.image_extension,
+ shuffle_groups=False,
+ **common_args
+ )
+ elif args.dataset_type == 'csv':
+ validation_generator = CSVGenerator(
+ args.annotations,
+ args.classes,
+ shuffle_groups=False,
+ **common_args
+ )
+ else:
+ raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
+
+ return validation_generator
+
+
+def parse_args(args):
+ """ Parse the arguments.
+ """
+ parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.')
+ subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
+ subparsers.required = True
+
+ coco_parser = subparsers.add_parser('coco')
+ coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
+
+ pascal_parser = subparsers.add_parser('pascal')
+ pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
+ pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
+
+ csv_parser = subparsers.add_parser('csv')
+ csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
+ csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
+
+ parser.add_argument('model', help='Path to RetinaNet model.')
+ parser.add_argument('--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true')
+ parser.add_argument('--backbone', help='The backbone of the model.', default='resnet50')
+ parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).')
+ parser.add_argument('--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float)
+ parser.add_argument('--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float)
+ parser.add_argument('--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int)
+ parser.add_argument('--save-path', help='Path for saving images with detections (doesn\'t work for COCO).')
+ parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
+ parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
+ parser.add_argument('--no-resize', help='Don''t rescale the image.', action='store_true')
+ parser.add_argument('--config', help='Path to a configuration parameters .ini file (only used with --convert-model).')
+ parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
+
+ return parser.parse_args(args)
+
+
+def main(args=None):
+ # parse arguments
+ if args is None:
+ args = sys.argv[1:]
+ args = parse_args(args)
+
+ # make sure tensorflow is the minimum required version
+ check_tf_version()
+
+ # optionally choose specific GPU
+ if args.gpu:
+ setup_gpu(args.gpu)
+
+ # make save path if it doesn't exist
+ if args.save_path is not None and not os.path.exists(args.save_path):
+ os.makedirs(args.save_path)
+
+ # optionally load config parameters
+ if args.config:
+ args.config = read_config_file(args.config)
+
+ # create the generator
+ backbone = models.backbone(args.backbone)
+ generator = create_generator(args, backbone.preprocess_image)
+
+ # optionally load anchor parameters
+ anchor_params = None
+ pyramid_levels = None
+ if args.config and 'anchor_parameters' in args.config:
+ anchor_params = parse_anchor_parameters(args.config)
+ if args.config and 'pyramid_levels' in args.config:
+ pyramid_levels = parse_pyramid_levels(args.config)
+
+ # load the model
+ print('Loading model, this may take a second...')
+ model = models.load_model(args.model, backbone_name=args.backbone)
+ generator.compute_shapes = make_shapes_callback(model)
+
+ # optionally convert the model
+ if args.convert_model:
+ model = models.convert_model(model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
+
+ # print model summary
+ # print(model.summary())
+
+ # start evaluation
+ if args.dataset_type == 'coco':
+ from ..utils.coco_eval import evaluate_coco
+ evaluate_coco(generator, model, args.score_threshold)
+ else:
+ average_precisions, inference_time = evaluate(
+ generator,
+ model,
+ iou_threshold=args.iou_threshold,
+ score_threshold=args.score_threshold,
+ max_detections=args.max_detections,
+ save_path=args.save_path
+ )
+
+ # print evaluation
+ total_instances = []
+ precisions = []
+ for label, (average_precision, num_annotations) in average_precisions.items():
+ print('{:.0f} instances of class'.format(num_annotations),
+ generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
+ total_instances.append(num_annotations)
+ precisions.append(average_precision)
+
+ if sum(total_instances) == 0:
+ print('No test instances found.')
+ return
+
+ print('Inference time for {:.0f} images: {:.4f}'.format(generator.size(), inference_time))
+
+ print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
+ print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/bin/train.py b/imageai_tf_deprecated/Detection/keras_retinanet/bin/train.py
new file mode 100644
index 00000000..2ec4792f
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/bin/train.py
@@ -0,0 +1,553 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import argparse
+import os
+import sys
+import warnings
+
+from tensorflow import keras
+import tensorflow as tf
+
+# Allow relative imports when being executed as script.
+if __name__ == "__main__" and __package__ is None:
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+ import keras_retinanet.bin # noqa: F401
+ __package__ = "keras_retinanet.bin"
+
+# Change these to absolute imports if you copy this script outside the keras_retinanet package.
+from .. import layers # noqa: F401
+from .. import losses
+from .. import models
+from ..callbacks import RedirectModel
+from ..callbacks.eval import Evaluate
+from ..models.retinanet import retinanet_bbox
+from ..preprocessing.csv_generator import CSVGenerator
+from ..preprocessing.kitti import KittiGenerator
+from ..preprocessing.open_images import OpenImagesGenerator
+from ..preprocessing.pascal_voc import PascalVocGenerator
+from ..utils.anchors import make_shapes_callback
+from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
+from ..utils.gpu import setup_gpu
+from ..utils.image import random_visual_effect_generator
+from ..utils.model import freeze as freeze_model
+from ..utils.tf_version import check_tf_version
+from ..utils.transform import random_transform_generator
+
+
+def makedirs(path):
+ # Intended behavior: try to create the directory,
+ # pass if the directory exists already, fails otherwise.
+ # Meant for Python 2.7/3.n compatibility.
+ try:
+ os.makedirs(path)
+ except OSError:
+ if not os.path.isdir(path):
+ raise
+
+
+def model_with_weights(model, weights, skip_mismatch):
+ """ Load weights for model.
+
+ Args
+ model : The model to load weights for.
+ weights : The weights to load.
+ skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
+ """
+ if weights is not None:
+ model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
+ return model
+
+
+def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0,
+ freeze_backbone=False, lr=1e-5, optimizer_clipnorm=0.001, config=None):
+ """ Creates three models (model, training_model, prediction_model).
+
+ Args
+ backbone_retinanet : A function to call to create a retinanet model with a given backbone.
+ num_classes : The number of classes to train.
+ weights : The weights to load into the model.
+ multi_gpu : The number of GPUs to use for training.
+ freeze_backbone : If True, disables learning for the backbone.
+ config : Config parameters, None indicates the default configuration.
+
+ Returns
+ model : The base model. This is also the model that is saved in snapshots.
+ training_model : The training model. If multi_gpu=0, this is identical to model.
+ prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
+ """
+
+ modifier = freeze_model if freeze_backbone else None
+
+ # load anchor parameters, or pass None (so that defaults will be used)
+ anchor_params = None
+ num_anchors = None
+ pyramid_levels = None
+ if config and 'anchor_parameters' in config:
+ anchor_params = parse_anchor_parameters(config)
+ num_anchors = anchor_params.num_anchors()
+ if config and 'pyramid_levels' in config:
+ pyramid_levels = parse_pyramid_levels(config)
+
+ # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
+ # optionally wrap in a parallel model
+ if multi_gpu > 1:
+ from keras.utils import multi_gpu_model
+ with tf.device('/cpu:0'):
+ model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier, pyramid_levels=pyramid_levels), weights=weights, skip_mismatch=True)
+ training_model = multi_gpu_model(model, gpus=multi_gpu)
+ else:
+ model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier, pyramid_levels=pyramid_levels), weights=weights, skip_mismatch=True)
+ training_model = model
+
+ # make prediction model
+ prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
+
+ # compile model
+ training_model.compile(
+ loss={
+ 'regression' : losses.smooth_l1(),
+ 'classification': losses.focal()
+ },
+ optimizer=keras.optimizers.Adam(lr=lr, clipnorm=optimizer_clipnorm)
+ )
+
+ return model, training_model, prediction_model
+
+
+def create_callbacks(model, training_model, prediction_model, validation_generator, args):
+ """ Creates the callbacks to use during training.
+
+ Args
+ model: The base model.
+ training_model: The model that is used for training.
+ prediction_model: The model that should be used for validation.
+ validation_generator: The generator for creating validation data.
+ args: parseargs args object.
+
+ Returns:
+ A list of callbacks used for training.
+ """
+ callbacks = []
+
+ tensorboard_callback = None
+
+ if args.tensorboard_dir:
+ makedirs(args.tensorboard_dir)
+ update_freq = args.tensorboard_freq
+ if update_freq not in ['epoch', 'batch']:
+ update_freq = int(update_freq)
+ tensorboard_callback = keras.callbacks.TensorBoard(
+ log_dir = args.tensorboard_dir,
+ histogram_freq = 0,
+ batch_size = args.batch_size,
+ write_graph = True,
+ write_grads = False,
+ write_images = False,
+ update_freq = update_freq,
+ embeddings_freq = 0,
+ embeddings_layer_names = None,
+ embeddings_metadata = None
+ )
+
+ if args.evaluation and validation_generator:
+ if args.dataset_type == 'coco':
+ from ..callbacks.coco import CocoEval
+
+ # use prediction model for evaluation
+ evaluation = CocoEval(validation_generator, tensorboard=tensorboard_callback)
+ else:
+ evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, weighted_average=args.weighted_average)
+ evaluation = RedirectModel(evaluation, prediction_model)
+ callbacks.append(evaluation)
+
+ # save the model
+ if args.snapshots:
+ # ensure directory created first; otherwise h5py will error after epoch.
+ makedirs(args.snapshot_path)
+ checkpoint = keras.callbacks.ModelCheckpoint(
+ os.path.join(
+ args.snapshot_path,
+ '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format(backbone=args.backbone, dataset_type=args.dataset_type)
+ ),
+ verbose=1,
+ # save_best_only=True,
+ # monitor="mAP",
+ # mode='max'
+ )
+ checkpoint = RedirectModel(checkpoint, model)
+ callbacks.append(checkpoint)
+
+ callbacks.append(keras.callbacks.ReduceLROnPlateau(
+ monitor = 'loss',
+ factor = args.reduce_lr_factor,
+ patience = args.reduce_lr_patience,
+ verbose = 1,
+ mode = 'auto',
+ min_delta = 0.0001,
+ cooldown = 0,
+ min_lr = 0
+ ))
+
+ if args.evaluation and validation_generator:
+ callbacks.append(keras.callbacks.EarlyStopping(
+ monitor = 'mAP',
+ patience = 5,
+ mode = 'max',
+ min_delta = 0.01
+ ))
+
+ if args.tensorboard_dir:
+ callbacks.append(tensorboard_callback)
+
+ return callbacks
+
+
+def create_generators(args, preprocess_image):
+ """ Create generators for training and validation.
+
+ Args
+ args : parseargs object containing configuration for generators.
+ preprocess_image : Function that preprocesses an image for the network.
+ """
+ common_args = {
+ 'batch_size' : args.batch_size,
+ 'config' : args.config,
+ 'image_min_side' : args.image_min_side,
+ 'image_max_side' : args.image_max_side,
+ 'no_resize' : args.no_resize,
+ 'preprocess_image' : preprocess_image,
+ 'group_method' : args.group_method
+ }
+
+ # create random transform generator for augmenting training data
+ if args.random_transform:
+ transform_generator = random_transform_generator(
+ min_rotation=-0.1,
+ max_rotation=0.1,
+ min_translation=(-0.1, -0.1),
+ max_translation=(0.1, 0.1),
+ min_shear=-0.1,
+ max_shear=0.1,
+ min_scaling=(0.9, 0.9),
+ max_scaling=(1.1, 1.1),
+ flip_x_chance=0.5,
+ flip_y_chance=0.5,
+ )
+ visual_effect_generator = random_visual_effect_generator(
+ contrast_range=(0.9, 1.1),
+ brightness_range=(-.1, .1),
+ hue_range=(-0.05, 0.05),
+ saturation_range=(0.95, 1.05)
+ )
+ else:
+ transform_generator = random_transform_generator(flip_x_chance=0.5)
+ visual_effect_generator = None
+
+ if args.dataset_type == 'coco':
+ # import here to prevent unnecessary dependency on cocoapi
+ from ..preprocessing.coco import CocoGenerator
+
+ train_generator = CocoGenerator(
+ args.coco_path,
+ 'train2017',
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+
+ validation_generator = CocoGenerator(
+ args.coco_path,
+ 'val2017',
+ shuffle_groups=False,
+ **common_args
+ )
+ elif args.dataset_type == 'pascal':
+ train_generator = PascalVocGenerator(
+ args.pascal_path,
+ 'train',
+ image_extension=args.image_extension,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+
+ validation_generator = PascalVocGenerator(
+ args.pascal_path,
+ 'val',
+ image_extension=args.image_extension,
+ shuffle_groups=False,
+ **common_args
+ )
+ elif args.dataset_type == 'csv':
+ train_generator = CSVGenerator(
+ args.annotations,
+ args.classes,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+
+ if args.val_annotations:
+ validation_generator = CSVGenerator(
+ args.val_annotations,
+ args.classes,
+ shuffle_groups=False,
+ **common_args
+ )
+ else:
+ validation_generator = None
+ elif args.dataset_type == 'oid':
+ train_generator = OpenImagesGenerator(
+ args.main_dir,
+ subset='train',
+ version=args.version,
+ labels_filter=args.labels_filter,
+ annotation_cache_dir=args.annotation_cache_dir,
+ parent_label=args.parent_label,
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+
+ validation_generator = OpenImagesGenerator(
+ args.main_dir,
+ subset='validation',
+ version=args.version,
+ labels_filter=args.labels_filter,
+ annotation_cache_dir=args.annotation_cache_dir,
+ parent_label=args.parent_label,
+ shuffle_groups=False,
+ **common_args
+ )
+ elif args.dataset_type == 'kitti':
+ train_generator = KittiGenerator(
+ args.kitti_path,
+ subset='train',
+ transform_generator=transform_generator,
+ visual_effect_generator=visual_effect_generator,
+ **common_args
+ )
+
+ validation_generator = KittiGenerator(
+ args.kitti_path,
+ subset='val',
+ shuffle_groups=False,
+ **common_args
+ )
+ else:
+ raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
+
+ return train_generator, validation_generator
+
+
+def check_args(parsed_args):
+ """ Function to check for inherent contradictions within parsed arguments.
+ For example, batch_size < num_gpus
+ Intended to raise errors prior to backend initialisation.
+
+ Args
+ parsed_args: parser.parse_args()
+
+ Returns
+ parsed_args
+ """
+
+ if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu:
+ raise ValueError(
+ "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(parsed_args.batch_size,
+ parsed_args.multi_gpu))
+
+ if parsed_args.multi_gpu > 1 and parsed_args.snapshot:
+ raise ValueError(
+ "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(parsed_args.multi_gpu,
+ parsed_args.snapshot))
+
+ if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force:
+ raise ValueError("Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.")
+
+ if 'resnet' not in parsed_args.backbone:
+ warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format(parsed_args.backbone))
+
+ return parsed_args
+
+
+def parse_args(args):
+ """ Parse the arguments.
+ """
+ parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
+ subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
+ subparsers.required = True
+
+ coco_parser = subparsers.add_parser('coco')
+ coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
+
+ pascal_parser = subparsers.add_parser('pascal')
+ pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
+ pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
+
+ kitti_parser = subparsers.add_parser('kitti')
+ kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')
+
+ def csv_list(string):
+ return string.split(',')
+
+ oid_parser = subparsers.add_parser('oid')
+ oid_parser.add_argument('main_dir', help='Path to dataset directory.')
+ oid_parser.add_argument('--version', help='The current dataset version is v4.', default='v4')
+ oid_parser.add_argument('--labels-filter', help='A list of labels to filter.', type=csv_list, default=None)
+ oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
+ oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)
+
+ csv_parser = subparsers.add_parser('csv')
+ csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for training.')
+ csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
+ csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).')
+
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('--snapshot', help='Resume training from a snapshot.')
+ group.add_argument('--imagenet-weights', help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True)
+ group.add_argument('--weights', help='Initialize the model with weights from a file.')
+ group.add_argument('--no-weights', help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False)
+ parser.add_argument('--backbone', help='Backbone model used by retinanet.', default='resnet50', type=str)
+ parser.add_argument('--batch-size', help='Size of the batches.', default=1, type=int)
+ parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).')
+ parser.add_argument('--multi-gpu', help='Number of GPUs to use for parallel processing.', type=int, default=0)
+ parser.add_argument('--multi-gpu-force', help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true')
+ parser.add_argument('--initial-epoch', help='Epoch from which to begin the train, useful if resuming from snapshot.', type=int, default=0)
+ parser.add_argument('--epochs', help='Number of epochs to train.', type=int, default=50)
+ parser.add_argument('--steps', help='Number of steps per epoch.', type=int, default=10000)
+ parser.add_argument('--lr', help='Learning rate.', type=float, default=1e-5)
+ parser.add_argument('--optimizer-clipnorm', help='Clipnorm parameter for optimizer.', type=float, default=0.001)
+ parser.add_argument('--snapshot-path', help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots')
+ parser.add_argument('--tensorboard-dir', help='Log directory for Tensorboard output', default='') # default='./logs') => https://github.com/tensorflow/tensorflow/pull/34870
+ parser.add_argument('--tensorboard-freq', help='Update frequency for Tensorboard output. Values \'epoch\', \'batch\' or int', default='epoch')
+ parser.add_argument('--no-snapshots', help='Disable saving snapshots.', dest='snapshots', action='store_false')
+ parser.add_argument('--no-evaluation', help='Disable per epoch evaluation.', dest='evaluation', action='store_false')
+ parser.add_argument('--freeze-backbone', help='Freeze training of backbone layers.', action='store_true')
+ parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
+ parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
+ parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
+ parser.add_argument('--no-resize', help='Don''t rescale the image.', action='store_true')
+ parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
+ parser.add_argument('--weighted-average', help='Compute the mAP using the weighted average of precisions among classes.', action='store_true')
+ parser.add_argument('--compute-val-loss', help='Compute validation loss during training', dest='compute_val_loss', action='store_true')
+ parser.add_argument('--reduce-lr-patience', help='Reduce learning rate after validation loss decreases over reduce_lr_patience epochs', type=int, default=2)
+ parser.add_argument('--reduce-lr-factor', help='When learning rate is reduced due to reduce_lr_patience, multiply by reduce_lr_factor', type=float, default=0.1)
+ parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
+
+ # Fit generator arguments
+ parser.add_argument('--multiprocessing', help='Use multiprocessing in fit_generator.', action='store_true')
+ parser.add_argument('--workers', help='Number of generator workers.', type=int, default=1)
+ parser.add_argument('--max-queue-size', help='Queue length for multiprocessing workers in fit_generator.', type=int, default=10)
+
+ return check_args(parser.parse_args(args))
+
+
+def main(args=None):
+ # parse arguments
+ if args is None:
+ args = sys.argv[1:]
+ args = parse_args(args)
+
+ # create object that stores backbone information
+ backbone = models.backbone(args.backbone)
+
+ # make sure tensorflow is the minimum required version
+ check_tf_version()
+
+ # optionally choose specific GPU
+ if args.gpu is not None:
+ setup_gpu(args.gpu)
+
+ # optionally load config parameters
+ if args.config:
+ args.config = read_config_file(args.config)
+
+ # create the generators
+ train_generator, validation_generator = create_generators(args, backbone.preprocess_image)
+
+ # create the model
+ if args.snapshot is not None:
+ print('Loading model, this may take a second...')
+ model = models.load_model(args.snapshot, backbone_name=args.backbone)
+ training_model = model
+ anchor_params = None
+ pyramid_levels = None
+ if args.config and 'anchor_parameters' in args.config:
+ anchor_params = parse_anchor_parameters(args.config)
+ if args.config and 'pyramid_levels' in args.config:
+ pyramid_levels = parse_pyramid_levels(args.config)
+
+ prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
+ else:
+ weights = args.weights
+ # default to imagenet if nothing else is specified
+ if weights is None and args.imagenet_weights:
+ weights = backbone.download_imagenet()
+
+ print('Creating model, this may take a second...')
+ model, training_model, prediction_model = create_models(
+ backbone_retinanet=backbone.retinanet,
+ num_classes=train_generator.num_classes(),
+ weights=weights,
+ multi_gpu=args.multi_gpu,
+ freeze_backbone=args.freeze_backbone,
+ lr=args.lr,
+ optimizer_clipnorm=args.optimizer_clipnorm,
+ config=args.config
+ )
+
+ # print model summary
+ print(model.summary())
+
+ # this lets the generator compute backbone layer shapes using the actual backbone model
+ if 'vgg' in args.backbone or 'densenet' in args.backbone:
+ train_generator.compute_shapes = make_shapes_callback(model)
+ if validation_generator:
+ validation_generator.compute_shapes = train_generator.compute_shapes
+
+ # create the callbacks
+ callbacks = create_callbacks(
+ model,
+ training_model,
+ prediction_model,
+ validation_generator,
+ args,
+ )
+
+ if not args.compute_val_loss:
+ validation_generator = None
+
+ # start training
+ return training_model.fit_generator(
+ generator=train_generator,
+ steps_per_epoch=args.steps,
+ epochs=args.epochs,
+ verbose=1,
+ callbacks=callbacks,
+ workers=args.workers,
+ use_multiprocessing=args.multiprocessing,
+ max_queue_size=args.max_queue_size,
+ validation_data=validation_generator,
+ initial_epoch=args.initial_epoch
+ )
+
+
+if __name__ == '__main__':
+ main()
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/__init__.py
new file mode 100644
index 00000000..7316c99a
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/__init__.py
@@ -0,0 +1 @@
+from .common import * # noqa: F401,F403
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/coco.py b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/coco.py
new file mode 100644
index 00000000..3518dd29
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/coco.py
@@ -0,0 +1,65 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+from ..utils.coco_eval import evaluate_coco
+
+
+class CocoEval(keras.callbacks.Callback):
+ """ Performs COCO evaluation on each epoch.
+ """
+ def __init__(self, generator, tensorboard=None, threshold=0.05):
+ """ CocoEval callback intializer.
+
+ Args
+ generator : The generator used for creating validation data.
+ tensorboard : If given, the results will be written to tensorboard.
+ threshold : The score threshold to use.
+ """
+ self.generator = generator
+ self.threshold = threshold
+ self.tensorboard = tensorboard
+
+ super(CocoEval, self).__init__()
+
+ def on_epoch_end(self, epoch, logs=None):
+ logs = logs or {}
+
+ coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
+ 'AP @[ IoU=0.50 | area= all | maxDets=100 ]',
+ 'AP @[ IoU=0.75 | area= all | maxDets=100 ]',
+ 'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
+ 'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
+ 'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]',
+ 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]',
+ 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]',
+ 'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
+ 'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
+ 'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
+ 'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]']
+ coco_eval_stats = evaluate_coco(self.generator, self.model, self.threshold)
+
+ if coco_eval_stats is not None:
+ for index, result in enumerate(coco_eval_stats):
+ logs[coco_tag[index]] = result
+
+ if self.tensorboard:
+ import tensorflow as tf
+ writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
+ with writer.as_default():
+ for index, result in enumerate(coco_eval_stats):
+ tf.summary.scalar('{}. {}'.format(index + 1, coco_tag[index]), result, step=epoch)
+ writer.flush()
diff --git a/imageai/Detection/keras_retinanet/callbacks/common.py b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/common.py
similarity index 69%
rename from imageai/Detection/keras_retinanet/callbacks/common.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/callbacks/common.py
index 76e6bf63..1c849bd0 100644
--- a/imageai/Detection/keras_retinanet/callbacks/common.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/common.py
@@ -1,18 +1,19 @@
-import keras.callbacks
+from tensorflow import keras
class RedirectModel(keras.callbacks.Callback):
"""Callback which wraps another callback, but executed on a different model.
- # Arguments
- callback: callback to wrap.
- model: model to use when executing callbacks.
- # Example
- ```python
- model = keras.models.load_model('model.h5')
- model_checkpoint = ModelCheckpoint(filepath='snapshot.h5')
- parallel_model = multi_gpu_model(model, gpus=2)
- parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)])
- ```
+
+ ```python
+ model = keras.models.load_model('model.h5')
+ model_checkpoint = ModelCheckpoint(filepath='snapshot.h5')
+ parallel_model = multi_gpu_model(model, gpus=2)
+ parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)])
+ ```
+
+ Args
+ callback : callback to wrap.
+ model : model to use when executing callbacks.
"""
def __init__(self,
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/eval.py b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/eval.py
new file mode 100644
index 00000000..365305a5
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/callbacks/eval.py
@@ -0,0 +1,99 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+from ..utils.eval import evaluate
+
+
+class Evaluate(keras.callbacks.Callback):
+ """ Evaluation callback for arbitrary datasets.
+ """
+
+ def __init__(
+ self,
+ generator,
+ iou_threshold=0.5,
+ score_threshold=0.05,
+ max_detections=100,
+ save_path=None,
+ tensorboard=None,
+ weighted_average=False,
+ verbose=1
+ ):
+ """ Evaluate a given dataset using a given model at the end of every epoch during training.
+
+ # Arguments
+ generator : The generator that represents the dataset to evaluate.
+ iou_threshold : The threshold used to consider when a detection is positive or negative.
+ score_threshold : The score confidence threshold to use for detections.
+ max_detections : The maximum number of detections to use per image.
+ save_path : The path to save images with visualized detections to.
+ tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value.
+ weighted_average : Compute the mAP using the weighted average of precisions among classes.
+ verbose : Set the verbosity level, by default this is set to 1.
+ """
+ self.generator = generator
+ self.iou_threshold = iou_threshold
+ self.score_threshold = score_threshold
+ self.max_detections = max_detections
+ self.save_path = save_path
+ self.tensorboard = tensorboard
+ self.weighted_average = weighted_average
+ self.verbose = verbose
+
+ super(Evaluate, self).__init__()
+
+ def on_epoch_end(self, epoch, logs=None):
+ logs = logs or {}
+
+ # run evaluation
+ average_precisions, _ = evaluate(
+ self.generator,
+ self.model,
+ iou_threshold=self.iou_threshold,
+ score_threshold=self.score_threshold,
+ max_detections=self.max_detections,
+ save_path=self.save_path
+ )
+
+ # compute per class average precision
+ total_instances = []
+ precisions = []
+ for label, (average_precision, num_annotations) in average_precisions.items():
+ if self.verbose == 1:
+ print('{:.0f} instances of class'.format(num_annotations),
+ self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
+ total_instances.append(num_annotations)
+ precisions.append(average_precision)
+ if self.weighted_average:
+ self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
+ else:
+ self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)
+
+ if self.tensorboard:
+ import tensorflow as tf
+ writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
+ with writer.as_default():
+ tf.summary.scalar("mAP", self.mean_ap, step=epoch)
+ if self.verbose == 1:
+ for label, (average_precision, num_annotations) in average_precisions.items():
+ tf.summary.scalar("AP_" + self.generator.label_to_name(label), average_precision, step=epoch)
+ writer.flush()
+
+ logs['mAP'] = self.mean_ap
+
+ if self.verbose == 1:
+ print('mAP: {:.4f}'.format(self.mean_ap))
diff --git a/imageai/Detection/keras_retinanet/initializers.py b/imageai_tf_deprecated/Detection/keras_retinanet/initializers.py
similarity index 78%
rename from imageai/Detection/keras_retinanet/initializers.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/initializers.py
index d993a7cf..be8124ab 100644
--- a/imageai/Detection/keras_retinanet/initializers.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/initializers.py
@@ -14,15 +14,13 @@
limitations under the License.
"""
-import keras
+from tensorflow import keras
-import numpy as np
import math
class PriorProbability(keras.initializers.Initializer):
- """
- Initializer applies a prior probability.
+ """ Apply a prior probability to the weights.
"""
def __init__(self, probability=0.01):
@@ -34,7 +32,7 @@ def get_config(self):
}
def __call__(self, shape, dtype=None):
- # set bias to -log((1 - p)/p) for foregound
- result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)
+ # set bias to -log((1 - p)/p) for foreground
+ result = keras.backend.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)
return result
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/layers/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/layers/__init__.py
new file mode 100644
index 00000000..5a8c7d32
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/layers/__init__.py
@@ -0,0 +1,2 @@
+from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes # noqa: F401
+from .filter_detections import FilterDetections # noqa: F401
diff --git a/imageai/Detection/keras_retinanet/layers/_misc.py b/imageai_tf_deprecated/Detection/keras_retinanet/layers/_misc.py
similarity index 51%
rename from imageai/Detection/keras_retinanet/layers/_misc.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/layers/_misc.py
index 9a2e541d..d0861213 100644
--- a/imageai/Detection/keras_retinanet/layers/_misc.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/layers/_misc.py
@@ -14,7 +14,8 @@
limitations under the License.
"""
-import keras
+import tensorflow
+from tensorflow import keras
from .. import backend
from ..utils import anchors as utils_anchors
@@ -22,43 +23,61 @@
class Anchors(keras.layers.Layer):
+ """ Keras layer for generating achors for a given shape.
+ """
+
def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs):
+ """ Initializer for an Anchors layer.
+
+ Args
+ size: The base size of the anchors to generate.
+ stride: The stride of the anchors to generate.
+ ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
+ scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
+ """
self.size = size
self.stride = stride
self.ratios = ratios
self.scales = scales
if ratios is None:
- self.ratios = np.array([0.5, 1, 2], keras.backend.floatx()),
+ self.ratios = utils_anchors.AnchorParameters.default.ratios
elif isinstance(ratios, list):
self.ratios = np.array(ratios)
if scales is None:
- self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
+ self.scales = utils_anchors.AnchorParameters.default.scales
elif isinstance(scales, list):
self.scales = np.array(scales)
- self.num_anchors = len(ratios) * len(scales)
- self.anchors = keras.backend.variable(utils_anchors.generate_anchors(
- base_size=size,
- ratios=ratios,
- scales=scales,
- ))
+ self.num_anchors = len(self.ratios) * len(self.scales)
+ self.anchors = utils_anchors.generate_anchors(
+ base_size=self.size,
+ ratios=self.ratios,
+ scales=self.scales,
+ ).astype(np.float32)
super(Anchors, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
features = inputs
- features_shape = keras.backend.shape(features)[:3]
+ features_shape = keras.backend.shape(features)
# generate proposals from bbox deltas and shifted anchors
- anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
+ if keras.backend.image_data_format() == 'channels_first':
+ anchors = backend.shift(features_shape[2:4], self.stride, self.anchors)
+ else:
+ anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1))
return anchors
def compute_output_shape(self, input_shape):
if None not in input_shape[1:]:
- total = np.prod(input_shape[1:3]) * self.num_anchors
+ if keras.backend.image_data_format() == 'channels_first':
+ total = np.prod(input_shape[2:4]) * self.num_anchors
+ else:
+ total = np.prod(input_shape[1:3]) * self.num_anchors
+
return (input_shape[0], total, 4)
else:
return (input_shape[0], None, 4)
@@ -75,82 +94,43 @@ def get_config(self):
return config
-class NonMaximumSuppression(keras.layers.Layer):
- def __init__(self, nms_threshold=0.5, score_threshold=0.05, max_boxes=300, *args, **kwargs):
- self.nms_threshold = nms_threshold
- self.score_threshold = score_threshold
- self.max_boxes = max_boxes
- super(NonMaximumSuppression, self).__init__(*args, **kwargs)
-
- def call(self, inputs, **kwargs):
- # TODO: support batch size > 1.
- boxes = inputs[0][0]
- classification = inputs[1][0]
- other = [i[0] for i in inputs[2:]] # can be any user-specified additional data
- indices = backend.range(keras.backend.shape(classification)[0])
- selected_scores = []
-
- # perform per class NMS
- for c in range(int(classification.shape[1])):
- scores = classification[:, c]
-
- # threshold based on score
- score_indices = backend.where(keras.backend.greater(scores, self.score_threshold))
- score_indices = keras.backend.cast(score_indices, 'int32')
- boxes_ = backend.gather_nd(boxes, score_indices)
- scores = keras.backend.gather(scores, score_indices)[:, 0]
-
- # perform NMS
- nms_indices = backend.non_max_suppression(boxes_, scores, max_output_size=self.max_boxes, iou_threshold=self.nms_threshold)
-
- # filter set of original indices
- selected_indices = keras.backend.gather(score_indices, nms_indices)
-
- # mask original classification column, setting all suppressed values to 0
- scores = keras.backend.gather(scores, nms_indices)
- scores = backend.scatter_nd(selected_indices, scores, keras.backend.shape(classification[:, c]))
- scores = keras.backend.expand_dims(scores, axis=1)
-
- selected_scores.append(scores)
-
- # reconstruct the (suppressed) classification scores
- classification = keras.backend.concatenate(selected_scores, axis=1)
-
- # reconstruct into the expected output
- detections = keras.backend.concatenate([boxes, classification] + other, axis=1)
-
- return keras.backend.expand_dims(detections, axis=0)
-
- def compute_output_shape(self, input_shape):
- return (input_shape[0][0], input_shape[0][1], sum([i[2] for i in input_shape]))
-
- def get_config(self):
- config = super(NonMaximumSuppression, self).get_config()
- config.update({
- 'nms_threshold' : self.nms_threshold,
- 'score_threshold' : self.score_threshold,
- 'max_boxes' : self.max_boxes,
- })
-
- return config
-
-
class UpsampleLike(keras.layers.Layer):
+ """ Keras layer for upsampling a Tensor to be the same shape as another Tensor.
+ """
+
def call(self, inputs, **kwargs):
source, target = inputs
target_shape = keras.backend.shape(target)
- return backend.resize_images(source, (target_shape[1], target_shape[2]))
+ if keras.backend.image_data_format() == 'channels_first':
+ source = tensorflow.transpose(source, (0, 2, 3, 1))
+ output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
+ output = tensorflow.transpose(output, (0, 3, 1, 2))
+ return output
+ else:
+ return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest')
def compute_output_shape(self, input_shape):
- return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
+ if keras.backend.image_data_format() == 'channels_first':
+ return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
+ else:
+ return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class RegressBoxes(keras.layers.Layer):
+ """ Keras layer for applying regression values to boxes.
+ """
+
def __init__(self, mean=None, std=None, *args, **kwargs):
+ """ Initializer for the RegressBoxes layer.
+
+ Args
+ mean: The mean value of the regression values which was used for normalization.
+ std: The standard value of the regression values which was used for normalization.
+ """
if mean is None:
mean = np.array([0, 0, 0, 0])
if std is None:
- std = np.array([0.1, 0.1, 0.2, 0.2])
+ std = np.array([0.2, 0.2, 0.2, 0.2])
if isinstance(mean, (list, tuple)):
mean = np.array(mean)
@@ -181,3 +161,26 @@ def get_config(self):
})
return config
+
+
+class ClipBoxes(keras.layers.Layer):
+ """ Keras layer to clip box values to lie inside a given shape.
+ """
+ def call(self, inputs, **kwargs):
+ image, boxes = inputs
+ shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx())
+ if keras.backend.image_data_format() == 'channels_first':
+ _, _, height, width = tensorflow.unstack(shape, axis=0)
+ else:
+ _, height, width, _ = tensorflow.unstack(shape, axis=0)
+
+ x1, y1, x2, y2 = tensorflow.unstack(boxes, axis=-1)
+ x1 = tensorflow.clip_by_value(x1, 0, width - 1)
+ y1 = tensorflow.clip_by_value(y1, 0, height - 1)
+ x2 = tensorflow.clip_by_value(x2, 0, width - 1)
+ y2 = tensorflow.clip_by_value(y2, 0, height - 1)
+
+ return keras.backend.stack([x1, y1, x2, y2], axis=2)
+
+ def compute_output_shape(self, input_shape):
+ return input_shape[1]
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/layers/filter_detections.py b/imageai_tf_deprecated/Detection/keras_retinanet/layers/filter_detections.py
new file mode 100644
index 00000000..1da7bf46
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/layers/filter_detections.py
@@ -0,0 +1,228 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import tensorflow
+from tensorflow import keras
+from .. import backend
+
+
+def filter_detections(
+ boxes,
+ classification,
+ other = [],
+ class_specific_filter = True,
+ nms = True,
+ score_threshold = 0.05,
+ max_detections = 300,
+ nms_threshold = 0.5
+):
+ """ Filter detections using the boxes and classification values.
+
+ Args
+ boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
+ classification : Tensor of shape (num_boxes, num_classes) containing the classification scores.
+ other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
+ class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
+ nms : Flag to enable/disable non maximum suppression.
+ score_threshold : Threshold used to prefilter the boxes with.
+ max_detections : Maximum number of detections to keep.
+ nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
+
+ Returns
+ A list of [boxes, scores, labels, other[0], other[1], ...].
+ boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
+ scores is shaped (max_detections,) and contains the scores of the predicted class.
+ labels is shaped (max_detections,) and contains the predicted label.
+ other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
+ In case there are less than max_detections detections, the tensors are padded with -1's.
+ """
+ def _filter_detections(scores, labels):
+ # threshold based on score
+ indices = tensorflow.where(keras.backend.greater(scores, score_threshold))
+
+ if nms:
+ filtered_boxes = tensorflow.gather_nd(boxes, indices)
+ filtered_scores = keras.backend.gather(scores, indices)[:, 0]
+
+ # perform NMS
+ nms_indices = tensorflow.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold)
+
+ # filter indices based on NMS
+ indices = keras.backend.gather(indices, nms_indices)
+
+ # add indices to list of all indices
+ labels = tensorflow.gather_nd(labels, indices)
+ indices = keras.backend.stack([indices[:, 0], labels], axis=1)
+
+ return indices
+
+ if class_specific_filter:
+ all_indices = []
+ # perform per class filtering
+ for c in range(int(classification.shape[1])):
+ scores = classification[:, c]
+ labels = c * tensorflow.ones((keras.backend.shape(scores)[0],), dtype='int64')
+ all_indices.append(_filter_detections(scores, labels))
+
+ # concatenate indices to single tensor
+ indices = keras.backend.concatenate(all_indices, axis=0)
+ else:
+ scores = keras.backend.max(classification, axis = 1)
+ labels = keras.backend.argmax(classification, axis = 1)
+ indices = _filter_detections(scores, labels)
+
+ # select top k
+ scores = tensorflow.gather_nd(classification, indices)
+ labels = indices[:, 1]
+ scores, top_indices = tensorflow.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))
+
+ # filter input using the final set of indices
+ indices = keras.backend.gather(indices[:, 0], top_indices)
+ boxes = keras.backend.gather(boxes, indices)
+ labels = keras.backend.gather(labels, top_indices)
+ other_ = [keras.backend.gather(o, indices) for o in other]
+
+ # zero pad the outputs
+ pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
+ boxes = tensorflow.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
+ scores = tensorflow.pad(scores, [[0, pad_size]], constant_values=-1)
+ labels = tensorflow.pad(labels, [[0, pad_size]], constant_values=-1)
+ labels = keras.backend.cast(labels, 'int32')
+ other_ = [tensorflow.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_]
+
+ # set shapes, since we know what they are
+ boxes.set_shape([max_detections, 4])
+ scores.set_shape([max_detections])
+ labels.set_shape([max_detections])
+ for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]):
+ o.set_shape([max_detections] + s[1:])
+
+ return [boxes, scores, labels] + other_
+
+
+class FilterDetections(keras.layers.Layer):
+ """ Keras layer for filtering detections using score threshold and NMS.
+ """
+
+ def __init__(
+ self,
+ nms = True,
+ class_specific_filter = True,
+ nms_threshold = 0.5,
+ score_threshold = 0.05,
+ max_detections = 300,
+ parallel_iterations = 32,
+ **kwargs
+ ):
+ """ Filters detections using score threshold, NMS and selecting the top-k detections.
+
+ Args
+ nms : Flag to enable/disable NMS.
+ class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
+ nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
+ score_threshold : Threshold used to prefilter the boxes with.
+ max_detections : Maximum number of detections to keep.
+ parallel_iterations : Number of batch items to process in parallel.
+ """
+ self.nms = nms
+ self.class_specific_filter = class_specific_filter
+ self.nms_threshold = nms_threshold
+ self.score_threshold = score_threshold
+ self.max_detections = max_detections
+ self.parallel_iterations = parallel_iterations
+ super(FilterDetections, self).__init__(**kwargs)
+
+ def call(self, inputs, **kwargs):
+ """ Constructs the NMS graph.
+
+ Args
+ inputs : List of [boxes, classification, other[0], other[1], ...] tensors.
+ """
+ boxes = inputs[0]
+ classification = inputs[1]
+ other = inputs[2:]
+
+ # wrap nms with our parameters
+ def _filter_detections(args):
+ boxes = args[0]
+ classification = args[1]
+ other = args[2]
+
+ return filter_detections(
+ boxes,
+ classification,
+ other,
+ nms = self.nms,
+ class_specific_filter = self.class_specific_filter,
+ score_threshold = self.score_threshold,
+ max_detections = self.max_detections,
+ nms_threshold = self.nms_threshold,
+ )
+
+ # call filter_detections on each batch
+ dtypes = [keras.backend.floatx(), keras.backend.floatx(), 'int32'] + [o.dtype for o in other]
+ shapes = [(self.max_detections, 4), (self.max_detections,), (self.max_detections,)]
+ shapes.extend([(self.max_detections,) + o.shape[2:] for o in other])
+ outputs = backend.map_fn(
+ _filter_detections,
+ elems=[boxes, classification, other],
+ dtype=dtypes,
+ shapes=shapes,
+ parallel_iterations=self.parallel_iterations,
+ )
+
+ return outputs
+
+ def compute_output_shape(self, input_shape):
+ """ Computes the output shapes given the input shapes.
+
+ Args
+ input_shape : List of input shapes [boxes, classification, other[0], other[1], ...].
+
+ Returns
+ List of tuples representing the output shapes:
+ [filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...]
+ """
+ return [
+ (input_shape[0][0], self.max_detections, 4),
+ (input_shape[1][0], self.max_detections),
+ (input_shape[1][0], self.max_detections),
+ ] + [
+ tuple([input_shape[i][0], self.max_detections] + list(input_shape[i][2:])) for i in range(2, len(input_shape))
+ ]
+
+ def compute_mask(self, inputs, mask=None):
+ """ This is required in Keras when there is more than 1 output.
+ """
+ return (len(inputs) + 1) * [None]
+
+ def get_config(self):
+ """ Gets the configuration of this layer.
+
+ Returns
+ Dictionary containing the parameters of this layer.
+ """
+ config = super(FilterDetections, self).get_config()
+ config.update({
+ 'nms' : self.nms,
+ 'class_specific_filter' : self.class_specific_filter,
+ 'nms_threshold' : self.nms_threshold,
+ 'score_threshold' : self.score_threshold,
+ 'max_detections' : self.max_detections,
+ 'parallel_iterations' : self.parallel_iterations,
+ })
+
+ return config
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/losses.py b/imageai_tf_deprecated/Detection/keras_retinanet/losses.py
new file mode 100644
index 00000000..15517884
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/losses.py
@@ -0,0 +1,118 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import tensorflow
+from tensorflow import keras
+
+
+def focal(alpha=0.25, gamma=2.0, cutoff=0.5):
+ """ Create a functor for computing the focal loss.
+
+ Args
+ alpha: Scale the focal weight with alpha.
+ gamma: Take the power of the focal weight with gamma.
+ cutoff: Positive prediction cutoff for soft targets
+
+ Returns
+ A functor that computes the focal loss using the alpha and gamma.
+ """
+ def _focal(y_true, y_pred):
+ """ Compute the focal loss given the target tensor and the predicted tensor.
+
+ As defined in https://arxiv.org/abs/1708.02002
+
+ Args
+ y_true: Tensor of target data from the generator with shape (B, N, num_classes).
+ y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).
+
+ Returns
+ The focal loss of y_pred w.r.t. y_true.
+ """
+ labels = y_true[:, :, :-1]
+ anchor_state = y_true[:, :, -1] # -1 for ignore, 0 for background, 1 for object
+ classification = y_pred
+
+ # filter out "ignore" anchors
+ indices = tensorflow.where(keras.backend.not_equal(anchor_state, -1))
+ labels = tensorflow.gather_nd(labels, indices)
+ classification = tensorflow.gather_nd(classification, indices)
+
+ # compute the focal loss
+ alpha_factor = keras.backend.ones_like(labels) * alpha
+ alpha_factor = tensorflow.where(keras.backend.greater(labels, cutoff), alpha_factor, 1 - alpha_factor)
+ focal_weight = tensorflow.where(keras.backend.greater(labels, cutoff), 1 - classification, classification)
+ focal_weight = alpha_factor * focal_weight ** gamma
+
+ cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification)
+
+ # compute the normalizer: the number of positive anchors
+ normalizer = tensorflow.where(keras.backend.equal(anchor_state, 1))
+ normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())
+ normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)
+
+ return keras.backend.sum(cls_loss) / normalizer
+
+ return _focal
+
+
+def smooth_l1(sigma=3.0):
+ """ Create a smooth L1 loss functor.
+
+ Args
+ sigma: This argument defines the point where the loss changes from L2 to L1.
+
+ Returns
+ A functor for computing the smooth L1 loss given target data and predicted data.
+ """
+ sigma_squared = sigma ** 2
+
+ def _smooth_l1(y_true, y_pred):
+ """ Compute the smooth L1 loss of y_pred w.r.t. y_true.
+
+ Args
+ y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive).
+ y_pred: Tensor from the network of shape (B, N, 4).
+
+ Returns
+ The smooth L1 loss of y_pred w.r.t. y_true.
+ """
+ # separate target and state
+ regression = y_pred
+ regression_target = y_true[:, :, :-1]
+ anchor_state = y_true[:, :, -1]
+
+ # filter out "ignore" anchors
+ indices = tensorflow.where(keras.backend.equal(anchor_state, 1))
+ regression = tensorflow.gather_nd(regression, indices)
+ regression_target = tensorflow.gather_nd(regression_target, indices)
+
+ # compute smooth L1 loss
+ # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma
+ # |x| - 0.5 / sigma / sigma otherwise
+ regression_diff = regression - regression_target
+ regression_diff = keras.backend.abs(regression_diff)
+ regression_loss = tensorflow.where(
+ keras.backend.less(regression_diff, 1.0 / sigma_squared),
+ 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),
+ regression_diff - 0.5 / sigma_squared
+ )
+
+ # compute the normalizer: the number of positive anchors
+ normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])
+ normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())
+ return keras.backend.sum(regression_loss) / normalizer
+
+ return _smooth_l1
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/__init__.py
new file mode 100644
index 00000000..e9b81d10
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/__init__.py
@@ -0,0 +1,125 @@
+from __future__ import print_function
+import sys
+
+
+class Backbone(object):
+ """ This class stores additional information on backbones.
+ """
+ def __init__(self, backbone):
+ # a dictionary mapping custom layer names to the correct classes
+ from .. import layers
+ from .. import losses
+ from .. import initializers
+ self.custom_objects = {
+ 'UpsampleLike' : layers.UpsampleLike,
+ 'PriorProbability' : initializers.PriorProbability,
+ 'RegressBoxes' : layers.RegressBoxes,
+ 'FilterDetections' : layers.FilterDetections,
+ 'Anchors' : layers.Anchors,
+ 'ClipBoxes' : layers.ClipBoxes,
+ '_smooth_l1' : losses.smooth_l1(),
+ '_focal' : losses.focal(),
+ }
+
+ self.backbone = backbone
+ self.validate()
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ raise NotImplementedError('retinanet method not implemented.')
+
+ def download_imagenet(self):
+ """ Downloads ImageNet weights and returns path to weights file.
+ """
+ raise NotImplementedError('download_imagenet method not implemented.')
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ raise NotImplementedError('validate method not implemented.')
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ Having this function in Backbone allows other backbones to define a specific preprocessing step.
+ """
+ raise NotImplementedError('preprocess_image method not implemented.')
+
+
+def backbone(backbone_name):
+ """ Returns a backbone object for the given backbone.
+ """
+ if 'densenet' in backbone_name:
+ from .densenet import DenseNetBackbone as b
+ elif 'seresnext' in backbone_name or 'seresnet' in backbone_name or 'senet' in backbone_name:
+ from .senet import SeBackbone as b
+ elif 'resnet' in backbone_name:
+ from .resnet import ResNetBackbone as b
+ elif 'mobilenet' in backbone_name:
+ from .mobilenet import MobileNetBackbone as b
+ elif 'vgg' in backbone_name:
+ from .vgg import VGGBackbone as b
+ elif 'EfficientNet' in backbone_name:
+ from .effnet import EfficientNetBackbone as b
+ else:
+ raise NotImplementedError('Backbone class for \'{}\' not implemented.'.format(backbone))
+
+ return b(backbone_name)
+
+
+def load_model(filepath, backbone_name='resnet50'):
+ """ Loads a retinanet model using the correct custom objects.
+
+ Args
+ filepath: one of the following:
+ - string, path to the saved model, or
+ - h5py.File object from which to load the model
+ backbone_name : Backbone with which the model was trained.
+
+ Returns
+ A keras.models.Model object.
+
+ Raises
+ ImportError: if h5py is not available.
+ ValueError: In case of an invalid savefile.
+ """
+ from tensorflow import keras
+ return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects)
+
+
+def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None, **kwargs):
+ """ Converts a training model to an inference model.
+
+ Args
+ model : A retinanet training model.
+ nms : Boolean, whether to add NMS filtering to the converted model.
+ class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
+ anchor_params : Anchor parameters object. If omitted, default values are used.
+ **kwargs : Inference and minimal retinanet model settings.
+
+ Returns
+ A keras.models.Model object.
+
+ Raises
+ ImportError: if h5py is not available.
+ ValueError: In case of an invalid savefile.
+ """
+ from .retinanet import retinanet_bbox
+ return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params, **kwargs)
+
+
+def assert_training_model(model):
+ """ Assert that the model is a training model.
+ """
+ assert(all(output in model.output_names for output in ['regression', 'classification'])), \
+ "Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format(model.output_names)
+
+
+def check_training_model(model):
+ """ Check that model is a training model and exit otherwise.
+ """
+ try:
+ assert_training_model(model)
+ except AssertionError as e:
+ print(e, file=sys.stderr)
+ sys.exit(1)
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/densenet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/densenet.py
new file mode 100644
index 00000000..b5a646f5
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/densenet.py
@@ -0,0 +1,111 @@
+"""
+Copyright 2018 vidosits (https://github.com/vidosits/)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+
+from . import retinanet
+from . import Backbone
+from ..utils.image import preprocess_image
+
+
+allowed_backbones = {
+ 'densenet121': ([6, 12, 24, 16], keras.applications.densenet.DenseNet121),
+ 'densenet169': ([6, 12, 32, 32], keras.applications.densenet.DenseNet169),
+ 'densenet201': ([6, 12, 48, 32], keras.applications.densenet.DenseNet201),
+}
+
+
+class DenseNetBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return densenet_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Download pre-trained weights for the specified backbone name.
+ This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop
+ where backbone is the densenet + number of layers (e.g. densenet121).
+ For more info check the explanation from the keras densenet script itself:
+ https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py
+ """
+ origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/'
+ file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'
+
+ # load weights
+ if keras.backend.image_data_format() == 'channels_first':
+ raise ValueError('Weights for "channels_first" format are not available.')
+
+ weights_url = origin + file_name.format(self.backbone)
+ return keras.utils.get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ backbone = self.backbone.split('_')[0]
+
+ if backbone not in allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys()))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return preprocess_image(inputs, mode='tf')
+
+
+def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a densenet backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a DenseNet backbone.
+ """
+ # choose default input
+ if inputs is None:
+ inputs = keras.layers.Input((None, None, 3))
+
+ blocks, creator = allowed_backbones[backbone]
+ model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None)
+
+ # get last conv layer from the end of each dense block
+ layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)]
+
+ # create the densenet backbone
+ # layer_outputs contains 4 layers
+ model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
+
+ # invoke modifier if given
+ if modifier:
+ model = modifier(model)
+
+ # create the full model
+ backbone_layers = {
+ 'C2': model.outputs[0],
+ 'C3': model.outputs[1],
+ 'C4': model.outputs[2],
+ 'C5': model.outputs[3]
+ }
+
+ model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
+
+ return model
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/effnet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/effnet.py
new file mode 100644
index 00000000..77b82600
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/effnet.py
@@ -0,0 +1,159 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+
+from . import retinanet
+from . import Backbone
+import efficientnet.keras as efn
+
+
+class EfficientNetBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ def __init__(self, backbone):
+ super(EfficientNetBackbone, self).__init__(backbone)
+ self.preprocess_image_func = None
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return effnet_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Downloads ImageNet weights and returns path to weights file.
+ """
+ from efficientnet.weights import IMAGENET_WEIGHTS_PATH
+ from efficientnet.weights import IMAGENET_WEIGHTS_HASHES
+
+ model_name = 'efficientnet-b' + self.backbone[-1]
+ file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
+ file_hash = IMAGENET_WEIGHTS_HASHES[model_name][1]
+ weights_path = keras.utils.get_file(file_name, IMAGENET_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash)
+ return weights_path
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ allowed_backbones = ['EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4',
+ 'EfficientNetB5', 'EfficientNetB6', 'EfficientNetB7']
+ backbone = self.backbone.split('_')[0]
+
+ if backbone not in allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return efn.preprocess_input(inputs)
+
+
+def effnet_retinanet(num_classes, backbone='EfficientNetB0', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a resnet backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a ResNet backbone.
+ """
+ # choose default input
+ if inputs is None:
+ if keras.backend.image_data_format() == 'channels_first':
+ inputs = keras.layers.Input(shape=(3, None, None))
+ else:
+ # inputs = keras.layers.Input(shape=(224, 224, 3))
+ inputs = keras.layers.Input(shape=(None, None, 3))
+
+ # get last conv layer from the end of each block [28x28, 14x14, 7x7]
+ if backbone == 'EfficientNetB0':
+ model = efn.EfficientNetB0(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB1':
+ model = efn.EfficientNetB1(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB2':
+ model = efn.EfficientNetB2(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB3':
+ model = efn.EfficientNetB3(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB4':
+ model = efn.EfficientNetB4(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB5':
+ model = efn.EfficientNetB5(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB6':
+ model = efn.EfficientNetB6(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'EfficientNetB7':
+ model = efn.EfficientNetB7(input_tensor=inputs, include_top=False, weights=None)
+ else:
+ raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
+
+ layer_outputs = ['block4a_expand_activation', 'block6a_expand_activation', 'top_activation']
+
+ layer_outputs = [
+ model.get_layer(name=layer_outputs[0]).output, # 28x28
+ model.get_layer(name=layer_outputs[1]).output, # 14x14
+ model.get_layer(name=layer_outputs[2]).output, # 7x7
+ ]
+ # create the densenet backbone
+ model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
+
+ # invoke modifier if given
+ if modifier:
+ model = modifier(model)
+
+ # C2 not provided
+ backbone_layers = {
+ 'C3': model.outputs[0],
+ 'C4': model.outputs[1],
+ 'C5': model.outputs[2]
+ }
+
+ # create the full model
+ return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
+
+
+def EfficientNetB0_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB0', inputs=inputs, **kwargs)
+
+
+def EfficientNetB1_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB1', inputs=inputs, **kwargs)
+
+
+def EfficientNetB2_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB2', inputs=inputs, **kwargs)
+
+
+def EfficientNetB3_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB3', inputs=inputs, **kwargs)
+
+
+def EfficientNetB4_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB4', inputs=inputs, **kwargs)
+
+
+def EfficientNetB5_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB5', inputs=inputs, **kwargs)
+
+
+def EfficientNetB6_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB6', inputs=inputs, **kwargs)
+
+
+def EfficientNetB7_retinanet(num_classes, inputs=None, **kwargs):
+ return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB7', inputs=inputs, **kwargs)
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/mobilenet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/mobilenet.py
new file mode 100644
index 00000000..e381f25c
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/mobilenet.py
@@ -0,0 +1,114 @@
+"""
+Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+from ..utils.image import preprocess_image
+
+from . import retinanet
+from . import Backbone
+
+
+class MobileNetBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224']
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Download pre-trained weights for the specified backbone name.
+ This name is in the format mobilenet{rows}_{alpha} where rows is the
+ imagenet shape dimension and 'alpha' controls the width of the network.
+ For more info check the explanation from the keras mobilenet script itself.
+ """
+
+ alpha = float(self.backbone.split('_')[1])
+ rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))
+
+ # load weights
+ if keras.backend.image_data_format() == 'channels_first':
+ raise ValueError('Weights for "channels_last" format '
+ 'are not available.')
+ if alpha == 1.0:
+ alpha_text = '1_0'
+ elif alpha == 0.75:
+ alpha_text = '7_5'
+ elif alpha == 0.50:
+ alpha_text = '5_0'
+ else:
+ alpha_text = '2_5'
+
+ model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
+ weights_url = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' + model_name
+ weights_path = keras.utils.get_file(model_name, weights_url, cache_subdir='models')
+
+ return weights_path
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ backbone = self.backbone.split('_')[0]
+
+ if backbone not in MobileNetBackbone.allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return preprocess_image(inputs, mode='tf')
+
+
+def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a mobilenet backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a MobileNet backbone.
+ """
+ alpha = float(backbone.split('_')[1])
+
+ # choose default input
+ if inputs is None:
+ inputs = keras.layers.Input((None, None, 3))
+
+ backbone = keras.applications.mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None)
+
+ # create the full model
+ layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu']
+ layer_outputs = [backbone.get_layer(name).output for name in layer_names]
+ backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name)
+
+ # invoke modifier if given
+ if modifier:
+ backbone = modifier(backbone)
+
+ # C2 not provided
+ backbone_layers = {
+ 'C3': backbone.outputs[0],
+ 'C4': backbone.outputs[1],
+ 'C5': backbone.outputs[2]
+ }
+
+ return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/resnet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/resnet.py
new file mode 100644
index 00000000..f5acfd0a
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/resnet.py
@@ -0,0 +1,131 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+import keras_resnet
+import keras_resnet.models
+
+from . import retinanet
+from . import Backbone
+from ..utils.image import preprocess_image
+
+
+class ResNetBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ def __init__(self, backbone):
+ super(ResNetBackbone, self).__init__(backbone)
+ self.custom_objects.update(keras_resnet.custom_objects)
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return resnet_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Downloads ImageNet weights and returns path to weights file.
+ """
+ resnet_filename = 'ResNet-{}-model.keras.h5'
+ resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename)
+ depth = int(self.backbone.replace('resnet', ''))
+
+ filename = resnet_filename.format(depth)
+ resource = resnet_resource.format(depth)
+ if depth == 50:
+ checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319'
+ elif depth == 101:
+ checksum = '05dc86924389e5b401a9ea0348a3213c'
+ elif depth == 152:
+ checksum = '6ee11ef2b135592f8031058820bb9e71'
+
+ return keras.utils.get_file(
+ filename,
+ resource,
+ cache_subdir='models',
+ md5_hash=checksum
+ )
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ allowed_backbones = ['resnet50', 'resnet101', 'resnet152']
+ backbone = self.backbone.split('_')[0]
+
+ if backbone not in allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return preprocess_image(inputs, mode='caffe')
+
+
+def resnet_retinanet(num_classes, backbone='resnet50', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a resnet backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a ResNet backbone.
+ """
+ # choose default input
+ if inputs is None:
+ if keras.backend.image_data_format() == 'channels_first':
+ inputs = keras.layers.Input(shape=(3, None, None))
+ else:
+ inputs = keras.layers.Input(shape=(None, None, 3))
+
+ # create the resnet backbone
+ if backbone == 'resnet50':
+ resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
+ elif backbone == 'resnet101':
+ resnet = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True)
+ elif backbone == 'resnet152':
+ resnet = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True)
+ else:
+ raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
+
+ # invoke modifier if given
+ if modifier:
+ resnet = modifier(resnet)
+
+ # create the full model
+ # resnet.outputs contains 4 layers
+ backbone_layers = {
+ 'C2': resnet.outputs[0],
+ 'C3': resnet.outputs[1],
+ 'C4': resnet.outputs[2],
+ 'C5': resnet.outputs[3]
+ }
+
+ return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
+
+
+def resnet50_retinanet(num_classes, inputs=None, **kwargs):
+ return resnet_retinanet(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs)
+
+
+def resnet101_retinanet(num_classes, inputs=None, **kwargs):
+ return resnet_retinanet(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs)
+
+
+def resnet152_retinanet(num_classes, inputs=None, **kwargs):
+ return resnet_retinanet(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs)
diff --git a/imageai/Detection/keras_retinanet/models/retinanet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/retinanet.py
similarity index 50%
rename from imageai/Detection/keras_retinanet/models/retinanet.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/models/retinanet.py
index cb97ed5a..9ca3e18c 100644
--- a/imageai/Detection/keras_retinanet/models/retinanet.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/retinanet.py
@@ -14,25 +14,11 @@
limitations under the License.
"""
-import keras
+from tensorflow import keras
from .. import initializers
from .. import layers
-from .. import losses
-
-import numpy as np
-
-"""
-A dictionary mapping custom layer names to the correct classes.
-"""
-custom_objects = {
- 'UpsampleLike' : layers.UpsampleLike,
- 'PriorProbability' : initializers.PriorProbability,
- 'RegressBoxes' : layers.RegressBoxes,
- 'NonMaximumSuppression' : layers.NonMaximumSuppression,
- 'Anchors' : layers.Anchors,
- '_smooth_l1' : losses.smooth_l1(),
- '_focal' : losses.focal(),
-}
+from ..utils.anchors import AnchorParameters
+from . import assert_training_model
def default_classification_model(
@@ -43,7 +29,7 @@ def default_classification_model(
classification_feature_size=256,
name='classification_submodel'
):
- """ Creates the default regression submodel.
+ """ Creates the default classification submodel.
Args
num_classes : Number of classes to predict a score for at each feature level.
@@ -61,37 +47,43 @@ def default_classification_model(
'padding' : 'same',
}
- inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
+ if keras.backend.image_data_format() == 'channels_first':
+ inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
+ else:
+ inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
for i in range(4):
outputs = keras.layers.Conv2D(
filters=classification_feature_size,
activation='relu',
name='pyramid_classification_{}'.format(i),
- kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
+ kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
bias_initializer='zeros',
**options
)(outputs)
outputs = keras.layers.Conv2D(
filters=num_classes * num_anchors,
- kernel_initializer=keras.initializers.zeros(),
+ kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
bias_initializer=initializers.PriorProbability(probability=prior_probability),
name='pyramid_classification',
**options
)(outputs)
# reshape output and apply sigmoid
+ if keras.backend.image_data_format() == 'channels_first':
+ outputs = keras.layers.Permute((2, 3, 1), name='pyramid_classification_permute')(outputs)
outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs)
outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
-def default_regression_model(num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):
+def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):
""" Creates the default regression submodel.
Args
+ num_values : Number of values to regress.
num_anchors : Number of anchors to regress for each feature level.
pyramid_feature_size : The number of filters to expect from the feature pyramid levels.
regression_feature_size : The number of filters to use in the layers in the regression submodel.
@@ -107,11 +99,14 @@ def default_regression_model(num_anchors, pyramid_feature_size=256, regression_f
'kernel_size' : 3,
'strides' : 1,
'padding' : 'same',
- 'kernel_initializer' : keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),
+ 'kernel_initializer' : keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
'bias_initializer' : 'zeros'
}
- inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
+ if keras.backend.image_data_format() == 'channels_first':
+ inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
+ else:
+ inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
for i in range(4):
outputs = keras.layers.Conv2D(
@@ -121,94 +116,86 @@ def default_regression_model(num_anchors, pyramid_feature_size=256, regression_f
**options
)(outputs)
- outputs = keras.layers.Conv2D(num_anchors * 4, name='pyramid_regression', **options)(outputs)
- outputs = keras.layers.Reshape((-1, 4), name='pyramid_regression_reshape')(outputs)
+ outputs = keras.layers.Conv2D(num_anchors * num_values, name='pyramid_regression', **options)(outputs)
+ if keras.backend.image_data_format() == 'channels_first':
+ outputs = keras.layers.Permute((2, 3, 1), name='pyramid_regression_permute')(outputs)
+ outputs = keras.layers.Reshape((-1, num_values), name='pyramid_regression_reshape')(outputs)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
-def __create_pyramid_features(C3, C4, C5, feature_size=256):
+def __create_pyramid_features(backbone_layers, pyramid_levels, feature_size=256):
""" Creates the FPN layers on top of the backbone features.
Args
- C3 : Feature stage C3 from the backbone.
- C4 : Feature stage C4 from the backbone.
- C5 : Feature stage C5 from the backbone.
+ backbone_layers: a dictionary containing feature stages C3, C4, C5 from the backbone. Also contains C2 if provided.
+ pyramid_levels: Pyramid levels in use.
feature_size : The feature size to use for the resulting feature levels.
Returns
- A list of feature levels [P3, P4, P5, P6, P7].
+ output_layers : A dict of feature levels. P3, P4, P5, P6 are always included. P2, P6, P7 included if in use.
"""
+
+ output_layers = {}
+
# upsample C5 to get P5 from the FPN paper
- P5 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5)
- P5_upsampled = layers.UpsampleLike(name='P5_upsampled')([P5, C4])
+ P5 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(backbone_layers['C5'])
+ P5_upsampled = layers.UpsampleLike(name='P5_upsampled')([P5, backbone_layers['C4']])
P5 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)
+ output_layers["P5"] = P5
# add P5 elementwise to C4
- P4 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4)
+ P4 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(backbone_layers['C4'])
P4 = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])
- P4_upsampled = layers.UpsampleLike(name='P4_upsampled')([P4, C3])
+ P4_upsampled = layers.UpsampleLike(name='P4_upsampled')([P4, backbone_layers['C3']])
P4 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)
+ output_layers["P4"] = P4
# add P4 elementwise to C3
- P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3)
+ P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(backbone_layers['C3'])
P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])
+ if 'C2' in backbone_layers and 2 in pyramid_levels:
+ P3_upsampled = layers.UpsampleLike(name='P3_upsampled')([P3, backbone_layers['C2']])
P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)
+ output_layers["P3"] = P3
+
+ if 'C2' in backbone_layers and 2 in pyramid_levels:
+ P2 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C2_reduced')(backbone_layers['C2'])
+ P2 = keras.layers.Add(name='P2_merged')([P3_upsampled, P2])
+ P2 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P2')(P2)
+ output_layers["P2"] = P2
# "P6 is obtained via a 3x3 stride-2 conv on C5"
- P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5)
+ if 6 in pyramid_levels:
+ P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(backbone_layers['C5'])
+ output_layers["P6"] = P6
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
- P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
- P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
+ if 7 in pyramid_levels:
+ if 6 not in pyramid_levels:
+ raise ValueError("P6 is required to use P7")
+ P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
+ P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
+ output_layers["P7"] = P7
- return [P3, P4, P5, P6, P7]
-
-
-class AnchorParameters:
- """ The parameteres that define how anchors are generated.
-
- Args
- sizes : List of sizes to use. Each size corresponds to one feature level.
- strides : List of strides to use. Each stride correspond to one feature level.
- ratios : List of ratios to use per location in a feature map.
- scales : List of scales to use per location in a feature map.
- """
- def __init__(self, sizes, strides, ratios, scales):
- self.sizes = sizes
- self.strides = strides
- self.ratios = ratios
- self.scales = scales
-
- def num_anchors(self):
- return len(self.ratios) * len(self.scales)
-
-"""
-The default anchor parameters.
-"""
-AnchorParameters.default = AnchorParameters(
- sizes = [32, 64, 128, 256, 512],
- strides = [8, 16, 32, 64, 128],
- ratios = np.array([0.5, 1, 2], keras.backend.floatx()),
- scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
-)
+ return output_layers
-def default_submodels(num_classes, anchor_parameters):
+def default_submodels(num_classes, num_anchors):
""" Create a list of default submodels used for object detection.
The default submodels contains a regression submodel and a classification submodel.
Args
- num_classes : Number of classes to use.
- anchor_parameters : Struct that defines how the anchors should be made.
+ num_classes : Number of classes to use.
+ num_anchors : Number of base anchors.
Returns
A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
"""
return [
- ('regression', default_regression_model(anchor_parameters.num_anchors())),
- ('classification', default_classification_model(num_classes, anchor_parameters.num_anchors()))
+ ('regression', default_regression_model(4, num_anchors)),
+ ('classification', default_classification_model(num_classes, num_anchors))
]
@@ -230,7 +217,7 @@ def __build_pyramid(models, features):
""" Applies all submodels to each FPN level.
Args
- models : List of sumodels to run on each pyramid level (by default only regression, classifcation).
+ models : List of submodels to run on each pyramid level (by default only regression, classifcation).
features : The FPN features.
Returns
@@ -269,10 +256,11 @@ def __build_anchors(anchor_parameters, features):
def retinanet(
inputs,
- backbone,
+ backbone_layers,
num_classes,
- anchor_parameters = AnchorParameters.default,
+ num_anchors = None,
create_pyramid_features = __create_pyramid_features,
+ pyramid_levels = None,
submodels = None,
name = 'retinanet'
):
@@ -283,88 +271,133 @@ def retinanet(
Args
inputs : keras.layers.Input (or list of) for the input to the model.
num_classes : Number of classes to classify.
- anchor_parameters : Struct containing configuration for anchor generation (sizes, strides, ratios, scales).
- create_pyramid_features : Functor for creating pyramid features given the features C3, C4, C5 from the backbone.
+ num_anchors : Number of base anchors.
+ create_pyramid_features : Functor for creating pyramid features given the features C3, C4, C5, and possibly C2 from the backbone.
+ pyramid_levels : pyramid levels to use.
submodels : Submodels to run on each feature map (default is regression and classification submodels).
name : Name of the model.
Returns
A keras.models.Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level.
- The order of the outputs is as defined in submodels. Using default values the output is:
+ The order of the outputs is as defined in submodels:
```
[
- anchors, regression, classification
+ regression, classification, other[0], other[1], ...
]
```
"""
+
+ if num_anchors is None:
+ num_anchors = AnchorParameters.default.num_anchors()
+
if submodels is None:
- submodels = default_submodels(num_classes, anchor_parameters)
+ submodels = default_submodels(num_classes, num_anchors)
+
+ if pyramid_levels is None:
+ pyramid_levels = [3, 4, 5, 6, 7]
- _, C3, C4, C5 = backbone.outputs # we ignore C2
+ if 2 in pyramid_levels and 'C2' not in backbone_layers:
+ raise ValueError("C2 not provided by backbone model. Cannot create P2 layers.")
+
+ if 3 not in pyramid_levels or 4 not in pyramid_levels or 5 not in pyramid_levels:
+ raise ValueError("pyramid levels 3, 4, and 5 required for functionality")
# compute pyramid features as per https://arxiv.org/abs/1708.02002
- features = create_pyramid_features(C3, C4, C5)
+ features = create_pyramid_features(backbone_layers, pyramid_levels)
+ feature_list = [features['P{}'.format(p)] for p in pyramid_levels]
# for all pyramid levels, run available submodels
- pyramids = __build_pyramid(submodels, features)
- anchors = __build_anchors(anchor_parameters, features)
+ pyramids = __build_pyramid(submodels, feature_list)
- # concatenate outputs to one list
- outputs = [anchors] + pyramids
-
- return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
+ return keras.models.Model(inputs=inputs, outputs=pyramids, name=name)
def retinanet_bbox(
- inputs,
- num_classes,
- nms = True,
- name = 'retinanet-bbox',
+ model = None,
+ nms = True,
+ class_specific_filter = True,
+ name = 'retinanet-bbox',
+ anchor_params = None,
+ pyramid_levels = None,
+ nms_threshold = 0.5,
+ score_threshold = 0.05,
+ max_detections = 300,
+ parallel_iterations = 32,
**kwargs
):
- """ Construct a RetinaNet model on top of a backbone and adds convenience functions to output detections directly.
+ """ Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly.
- This model uses the minimum retinanet model and appends a few layers to compute detections within the graph.
+ This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph.
These layers include applying the regression values to the anchors and performing NMS.
Args
- inputs : keras.layers.Input (or list of) for the input to the model.
- num_classes : Number of classes to classify.
- name : Name of the model.
- *kwargs : Additional kwargs to pass to the minimal retinanet model.
+ model : RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using **kwargs.
+ nms : Whether to use non-maximum suppression for the filtering step.
+ class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
+ name : Name of the model.
+ anchor_params : Struct containing anchor parameters. If None, default values are used.
+ pyramid_levels : pyramid levels to use.
+ nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
+ score_threshold : Threshold used to prefilter the boxes with.
+ max_detections : Maximum number of detections to keep.
+ parallel_iterations : Number of batch items to process in parallel.
+ **kwargs : Additional kwargs to pass to the minimal retinanet model.
Returns
- A keras.models.Model which takes an image as input and outputs the result from each submodel on every pyramid level and a list of detections.
+ A keras.models.Model which takes an image as input and outputs the detections on the image.
- The order is as defined in submodels. Using default values the output is:
+ The order is defined as follows:
```
[
- regression, classification, detections
+ boxes, scores, labels, other[0], other[1], ...
]
```
"""
- model = retinanet(inputs=inputs, num_classes=num_classes, **kwargs)
+
+ # if no anchor parameters are passed, use default values
+ if anchor_params is None:
+ anchor_params = AnchorParameters.default
+
+ # create RetinaNet model
+ if model is None:
+ model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs)
+ else:
+ assert_training_model(model)
+
+ if pyramid_levels is None:
+ pyramid_levels = [3, 4, 5, 6, 7]
+
+ assert len(pyramid_levels) == len(anchor_params.sizes), \
+ "number of pyramid levels {} should match number of anchor parameter sizes {}".format(len(pyramid_levels),
+ len(anchor_params.sizes))
+
+ pyramid_layer_names = ['P{}'.format(p) for p in pyramid_levels]
+ # compute the anchors
+ features = [model.get_layer(p_name).output for p_name in pyramid_layer_names]
+ anchors = __build_anchors(anchor_params, features)
# we expect the anchors, regression and classification values as first output
- anchors = model.outputs[0]
- regression = model.outputs[1]
- classification = model.outputs[2]
+ regression = model.outputs[0]
+ classification = model.outputs[1]
# "other" can be any additional output from custom submodels, by default this will be []
- other = model.outputs[3:]
+ other = model.outputs[2:]
# apply predicted regression to anchors
boxes = layers.RegressBoxes(name='boxes')([anchors, regression])
-
- # additionally apply non maximum suppression
- if nms:
- detections = layers.NonMaximumSuppression(name='nms')([boxes, classification] + other)
- else:
- detections = keras.layers.Concatenate(axis=2, name='detections')([boxes, classification] + other)
-
- # construct list of outputs
- outputs = [regression, classification] + other + [detections]
+ boxes = layers.ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes])
+
+ # filter detections (apply NMS / score threshold / select top-k)
+ detections = layers.FilterDetections(
+ nms = nms,
+ class_specific_filter = class_specific_filter,
+ name = 'filtered_detections',
+ nms_threshold = nms_threshold,
+ score_threshold = score_threshold,
+ max_detections = max_detections,
+ parallel_iterations = parallel_iterations
+ )([boxes, classification] + other)
# construct the model
- return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
+ return keras.models.Model(inputs=model.inputs, outputs=detections, name=name)
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/senet.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/senet.py
new file mode 100644
index 00000000..a4cc4126
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/senet.py
@@ -0,0 +1,161 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tensorflow import keras
+
+from . import retinanet
+from . import Backbone
+from classification_models.keras import Classifiers
+
+
+class SeBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ def __init__(self, backbone):
+ super(SeBackbone, self).__init__(backbone)
+ _, self.preprocess_image_func = Classifiers.get(self.backbone)
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return senet_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Downloads ImageNet weights and returns path to weights file.
+ """
+ from classification_models.weights import WEIGHTS_COLLECTION
+
+ weights_path = None
+ for el in WEIGHTS_COLLECTION:
+ if el['model'] == self.backbone and not el['include_top']:
+ weights_path = keras.utils.get_file(el['name'], el['url'], cache_subdir='models', file_hash=el['md5'])
+
+ if weights_path is None:
+ raise ValueError('Unable to find imagenet weights for backbone {}!'.format(self.backbone))
+
+ return weights_path
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ allowed_backbones = ['seresnet18', 'seresnet34', 'seresnet50', 'seresnet101', 'seresnet152',
+ 'seresnext50', 'seresnext101', 'senet154']
+ backbone = self.backbone.split('_')[0]
+
+ if backbone not in allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return self.preprocess_image_func(inputs)
+
+
+def senet_retinanet(num_classes, backbone='seresnext50', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a resnet backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a ResNet backbone.
+ """
+ # choose default input
+ if inputs is None:
+ if keras.backend.image_data_format() == 'channels_first':
+ inputs = keras.layers.Input(shape=(3, None, None))
+ else:
+ # inputs = keras.layers.Input(shape=(224, 224, 3))
+ inputs = keras.layers.Input(shape=(None, None, 3))
+
+ classifier, _ = Classifiers.get(backbone)
+ model = classifier(input_tensor=inputs, include_top=False, weights=None)
+
+ # get last conv layer from the end of each block [28x28, 14x14, 7x7]
+ if backbone == 'seresnet18' or backbone == 'seresnet34':
+ layer_outputs = ['stage3_unit1_relu1', 'stage4_unit1_relu1', 'relu1']
+ elif backbone == 'seresnet50':
+ layer_outputs = ['activation_36', 'activation_66', 'activation_81']
+ elif backbone == 'seresnet101':
+ layer_outputs = ['activation_36', 'activation_151', 'activation_166']
+ elif backbone == 'seresnet152':
+ layer_outputs = ['activation_56', 'activation_236', 'activation_251']
+ elif backbone == 'seresnext50':
+ layer_outputs = ['activation_37', 'activation_67', 'activation_81']
+ elif backbone == 'seresnext101':
+ layer_outputs = ['activation_37', 'activation_152', 'activation_166']
+ elif backbone == 'senet154':
+ layer_outputs = ['activation_59', 'activation_239', 'activation_253']
+ else:
+ raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
+
+ layer_outputs = [
+ model.get_layer(name=layer_outputs[0]).output, # 28x28
+ model.get_layer(name=layer_outputs[1]).output, # 14x14
+ model.get_layer(name=layer_outputs[2]).output, # 7x7
+ ]
+ # create the densenet backbone
+ model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
+
+ # invoke modifier if given
+ if modifier:
+ model = modifier(model)
+
+ # C2 not provided
+ backbone_layers = {
+ 'C3': model.outputs[0],
+ 'C4': model.outputs[1],
+ 'C5': model.outputs[2]
+ }
+
+ # create the full model
+ return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
+
+
+def seresnet18_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnet18', inputs=inputs, **kwargs)
+
+
+def seresnet34_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnet34', inputs=inputs, **kwargs)
+
+
+def seresnet50_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnet50', inputs=inputs, **kwargs)
+
+
+def seresnet101_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnet101', inputs=inputs, **kwargs)
+
+
+def seresnet152_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnet152', inputs=inputs, **kwargs)
+
+
+def seresnext50_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnext50', inputs=inputs, **kwargs)
+
+
+def seresnext101_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='seresnext101', inputs=inputs, **kwargs)
+
+
+def senet154_retinanet(num_classes, inputs=None, **kwargs):
+ return senet_retinanet(num_classes=num_classes, backbone='senet154', inputs=inputs, **kwargs)
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/models/vgg.py b/imageai_tf_deprecated/Detection/keras_retinanet/models/vgg.py
new file mode 100644
index 00000000..011c30dc
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/models/vgg.py
@@ -0,0 +1,106 @@
+"""
+Copyright 2017-2018 cgratie (https://github.com/cgratie/)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+from tensorflow import keras
+
+from . import retinanet
+from . import Backbone
+from ..utils.image import preprocess_image
+
+
+class VGGBackbone(Backbone):
+ """ Describes backbone information and provides utility functions.
+ """
+
+ def retinanet(self, *args, **kwargs):
+ """ Returns a retinanet model using the correct backbone.
+ """
+ return vgg_retinanet(*args, backbone=self.backbone, **kwargs)
+
+ def download_imagenet(self):
+ """ Downloads ImageNet weights and returns path to weights file.
+ Weights can be downloaded at https://github.com/fizyr/keras-models/releases .
+ """
+ if self.backbone == 'vgg16':
+ resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP
+ checksum = '6d6bbae143d832006294945121d1f1fc'
+ elif self.backbone == 'vgg19':
+ resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP
+ checksum = '253f8cb515780f3b799900260a226db6'
+ else:
+ raise ValueError("Backbone '{}' not recognized.".format(self.backbone))
+
+ return keras.utils.get_file(
+ '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone),
+ resource,
+ cache_subdir='models',
+ file_hash=checksum
+ )
+
+ def validate(self):
+ """ Checks whether the backbone string is correct.
+ """
+ allowed_backbones = ['vgg16', 'vgg19']
+
+ if self.backbone not in allowed_backbones:
+ raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones))
+
+ def preprocess_image(self, inputs):
+ """ Takes as input an image and prepares it for being passed through the network.
+ """
+ return preprocess_image(inputs, mode='caffe')
+
+
+def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs):
+ """ Constructs a retinanet model using a vgg backbone.
+
+ Args
+ num_classes: Number of classes to predict.
+ backbone: Which backbone to use (one of ('vgg16', 'vgg19')).
+ inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
+ modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
+
+ Returns
+ RetinaNet model with a VGG backbone.
+ """
+ # choose default input
+ if inputs is None:
+ inputs = keras.layers.Input(shape=(None, None, 3))
+
+ # create the vgg backbone
+ if backbone == 'vgg16':
+ vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None)
+ elif backbone == 'vgg19':
+ vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None)
+ else:
+ raise ValueError("Backbone '{}' not recognized.".format(backbone))
+
+ if modifier:
+ vgg = modifier(vgg)
+
+ # create the full model
+ layer_names = ["block3_pool", "block4_pool", "block5_pool"]
+ layer_outputs = [vgg.get_layer(name).output for name in layer_names]
+
+ # C2 not provided
+ backbone_layers = {
+ 'C3': layer_outputs[0],
+ 'C4': layer_outputs[1],
+ 'C5': layer_outputs[2]
+ }
+
+ return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
diff --git a/imageai/Prediction/SqueezeNet/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/__init__.py
similarity index 100%
rename from imageai/Prediction/SqueezeNet/__init__.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/__init__.py
diff --git a/imageai/Detection/keras_retinanet/preprocessing/coco.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/coco.py
similarity index 62%
rename from imageai/Detection/keras_retinanet/preprocessing/coco.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/coco.py
index fc063710..b684b809 100644
--- a/imageai/Detection/keras_retinanet/preprocessing/coco.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/coco.py
@@ -24,7 +24,18 @@
class CocoGenerator(Generator):
+ """ Generate data from the COCO dataset.
+
+ See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information.
+ """
+
def __init__(self, data_dir, set_name, **kwargs):
+ """ Initialize a COCO data generator.
+
+ Args
+ data_dir: Path to where the COCO dataset is stored.
+ set_name: Name of the set to parse.
+ """
self.data_dir = data_dir
self.set_name = set_name
self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json'))
@@ -35,6 +46,8 @@ def __init__(self, data_dir, set_name, **kwargs):
super(CocoGenerator, self).__init__(**kwargs)
def load_classes(self):
+ """ Loads the class to label mapping (and inverse) for COCO.
+ """
# load class names (name -> label)
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
@@ -53,39 +66,76 @@ def load_classes(self):
self.labels[value] = key
def size(self):
+ """ Size of the COCO dataset.
+ """
return len(self.image_ids)
def num_classes(self):
+ """ Number of classes in the dataset. For COCO this is 80.
+ """
return len(self.classes)
+ def has_label(self, label):
+ """ Return True if label is a known label.
+ """
+ return label in self.labels
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ return name in self.classes
+
def name_to_label(self, name):
+ """ Map name to label.
+ """
return self.classes[name]
def label_to_name(self, label):
+ """ Map label to name.
+ """
return self.labels[label]
def coco_label_to_label(self, coco_label):
+ """ Map COCO label to the label as used in the network.
+ COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes.
+ """
return self.coco_labels_inverse[coco_label]
def coco_label_to_name(self, coco_label):
+ """ Map COCO label to name.
+ """
return self.label_to_name(self.coco_label_to_label(coco_label))
def label_to_coco_label(self, label):
+ """ Map label as used by the network to labels as used by COCO.
+ """
return self.coco_labels[label]
+ def image_path(self, image_index):
+ """ Returns the image path for image_index.
+ """
+ image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
+ path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name'])
+ return path
+
def image_aspect_ratio(self, image_index):
+ """ Compute the aspect ratio for an image with image_index.
+ """
image = self.coco.loadImgs(self.image_ids[image_index])[0]
return float(image['width']) / float(image['height'])
def load_image(self, image_index):
- image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
- path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name'])
+ """ Load an image at the image_index.
+ """
+ path = self.image_path(image_index)
return read_image_bgr(path)
def load_annotations(self, image_index):
+ """ Load annotations for an image_index.
+ """
# get ground truth annotations
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
- annotations = np.zeros((0, 5))
+ annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
# some images appear to miss annotations (like image with id 257034)
if len(annotations_ids) == 0:
@@ -98,13 +148,12 @@ def load_annotations(self, image_index):
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
- annotation = np.zeros((1, 5))
- annotation[0, :4] = a['bbox']
- annotation[0, 4] = self.coco_label_to_label(a['category_id'])
- annotations = np.append(annotations, annotation, axis=0)
-
- # transform from [x, y, w, h] to [x1, y1, x2, y2]
- annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
- annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
+ annotations['labels'] = np.concatenate([annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0)
+ annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[
+ a['bbox'][0],
+ a['bbox'][1],
+ a['bbox'][0] + a['bbox'][2],
+ a['bbox'][1] + a['bbox'][3],
+ ]]], axis=0)
return annotations
diff --git a/imageai/Detection/keras_retinanet/preprocessing/csv_generator.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/csv_generator.py
similarity index 72%
rename from imageai/Detection/keras_retinanet/preprocessing/csv_generator.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/csv_generator.py
index a82c54f7..c756224e 100644
--- a/imageai/Detection/keras_retinanet/preprocessing/csv_generator.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/csv_generator.py
@@ -25,6 +25,7 @@
import csv
import sys
import os.path
+from collections import OrderedDict
def _parse(value, function, fmt):
@@ -42,8 +43,12 @@ def _parse(value, function, fmt):
def _read_classes(csv_reader):
- result = {}
+ """ Parse the classes file given by csv_reader.
+ """
+ result = OrderedDict()
for line, row in enumerate(csv_reader):
+ line += 1
+
try:
class_name, class_id = row
except ValueError:
@@ -57,10 +62,14 @@ def _read_classes(csv_reader):
def _read_annotations(csv_reader, classes):
- result = {}
+ """ Read annotations from the csv_reader.
+ """
+ result = OrderedDict()
for line, row in enumerate(csv_reader):
+ line += 1
+
try:
- img_file, x1, y1, x2, y2, class_name = row
+ img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
@@ -91,8 +100,7 @@ def _read_annotations(csv_reader, classes):
def _open_for_csv(path):
- """
- Open a file with flags suitable for csv.reader.
+ """ Open a file with flags suitable for csv.reader.
This is different for python2 it means with mode 'rb',
for python3 this means 'r' with "universal newlines".
@@ -104,6 +112,11 @@ def _open_for_csv(path):
class CSVGenerator(Generator):
+ """ Generate data for a custom CSV dataset.
+
+ See https://github.com/fizyr/keras-retinanet#csv-datasets for more information.
+ """
+
def __init__(
self,
csv_data_file,
@@ -111,6 +124,13 @@ def __init__(
base_dir=None,
**kwargs
):
+ """ Initialize a CSV data generator.
+
+ Args
+ csv_data_file: Path to the CSV annotations file.
+ csv_class_file: Path to the CSV classes file.
+ base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
+ """
self.image_names = []
self.image_data = {}
self.base_dir = base_dir
@@ -141,39 +161,65 @@ def __init__(
super(CSVGenerator, self).__init__(**kwargs)
def size(self):
+ """ Size of the dataset.
+ """
return len(self.image_names)
def num_classes(self):
+ """ Number of classes in the dataset.
+ """
return max(self.classes.values()) + 1
+ def has_label(self, label):
+ """ Return True if label is a known label.
+ """
+ return label in self.labels
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ return name in self.classes
+
def name_to_label(self, name):
+ """ Map name to label.
+ """
return self.classes[name]
def label_to_name(self, label):
+ """ Map label to name.
+ """
return self.labels[label]
def image_path(self, image_index):
+ """ Returns the image path for image_index.
+ """
return os.path.join(self.base_dir, self.image_names[image_index])
def image_aspect_ratio(self, image_index):
+ """ Compute the aspect ratio for an image with image_index.
+ """
# PIL is fast for metadata
image = Image.open(self.image_path(image_index))
return float(image.width) / float(image.height)
def load_image(self, image_index):
+ """ Load an image at the image_index.
+ """
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
- path = self.image_names[image_index]
- annots = self.image_data[path]
- boxes = np.zeros((len(annots), 5))
-
- for idx, annot in enumerate(annots):
- class_name = annot['class']
- boxes[idx, 0] = float(annot['x1'])
- boxes[idx, 1] = float(annot['y1'])
- boxes[idx, 2] = float(annot['x2'])
- boxes[idx, 3] = float(annot['y2'])
- boxes[idx, 4] = self.name_to_label(class_name)
-
- return boxes
+ """ Load annotations for an image_index.
+ """
+ path = self.image_names[image_index]
+ annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
+
+ for idx, annot in enumerate(self.image_data[path]):
+ annotations['labels'] = np.concatenate((annotations['labels'], [self.name_to_label(annot['class'])]))
+ annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[
+ float(annot['x1']),
+ float(annot['y1']),
+ float(annot['x2']),
+ float(annot['y2']),
+ ]]))
+
+ return annotations
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/generator.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/generator.py
new file mode 100644
index 00000000..7c3bb0a6
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/generator.py
@@ -0,0 +1,381 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+import random
+import warnings
+
+from tensorflow import keras
+
+from ..utils.anchors import (
+ anchor_targets_bbox,
+ anchors_for_shape,
+ guess_shapes
+)
+from ..utils.config import parse_anchor_parameters, parse_pyramid_levels
+from ..utils.image import (
+ TransformParameters,
+ adjust_transform_for_image,
+ apply_transform,
+ preprocess_image,
+ resize_image,
+)
+from ..utils.transform import transform_aabb
+
+
+class Generator(keras.utils.Sequence):
+ """ Abstract generator class.
+ """
+
+ def __init__(
+ self,
+ transform_generator = None,
+ visual_effect_generator=None,
+ batch_size=1,
+ group_method='ratio', # one of 'none', 'random', 'ratio'
+ shuffle_groups=True,
+ image_min_side=800,
+ image_max_side=1333,
+ no_resize=False,
+ transform_parameters=None,
+ compute_anchor_targets=anchor_targets_bbox,
+ compute_shapes=guess_shapes,
+ preprocess_image=preprocess_image,
+ config=None
+ ):
+ """ Initialize Generator object.
+
+ Args
+ transform_generator : A generator used to randomly transform images and annotations.
+ batch_size : The size of the batches to generate.
+ group_method : Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
+ shuffle_groups : If True, shuffles the groups each epoch.
+ image_min_side : After resizing the minimum side of an image is equal to image_min_side.
+ image_max_side : If after resizing the maximum side is larger than image_max_side, scales down further so that the max side is equal to image_max_side.
+ no_resize : If True, no image/annotation resizing is performed.
+ transform_parameters : The transform parameters used for data augmentation.
+ compute_anchor_targets : Function handler for computing the targets of anchors for an image and its annotations.
+ compute_shapes : Function handler for computing the shapes of the pyramid for a given input.
+ preprocess_image : Function handler for preprocessing an image (scaling / normalizing) for passing through a network.
+ """
+ self.transform_generator = transform_generator
+ self.visual_effect_generator = visual_effect_generator
+ self.batch_size = int(batch_size)
+ self.group_method = group_method
+ self.shuffle_groups = shuffle_groups
+ self.image_min_side = image_min_side
+ self.image_max_side = image_max_side
+ self.no_resize = no_resize
+ self.transform_parameters = transform_parameters or TransformParameters()
+ self.compute_anchor_targets = compute_anchor_targets
+ self.compute_shapes = compute_shapes
+ self.preprocess_image = preprocess_image
+ self.config = config
+
+ # Define groups
+ self.group_images()
+
+ # Shuffle when initializing
+ if self.shuffle_groups:
+ self.on_epoch_end()
+
+ def on_epoch_end(self):
+ if self.shuffle_groups:
+ random.shuffle(self.groups)
+
+ def size(self):
+ """ Size of the dataset.
+ """
+ raise NotImplementedError('size method not implemented')
+
+ def num_classes(self):
+ """ Number of classes in the dataset.
+ """
+ raise NotImplementedError('num_classes method not implemented')
+
+ def has_label(self, label):
+ """ Returns True if label is a known label.
+ """
+ raise NotImplementedError('has_label method not implemented')
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ raise NotImplementedError('has_name method not implemented')
+
+ def name_to_label(self, name):
+ """ Map name to label.
+ """
+ raise NotImplementedError('name_to_label method not implemented')
+
+ def label_to_name(self, label):
+ """ Map label to name.
+ """
+ raise NotImplementedError('label_to_name method not implemented')
+
+ def image_aspect_ratio(self, image_index):
+ """ Compute the aspect ratio for an image with image_index.
+ """
+ raise NotImplementedError('image_aspect_ratio method not implemented')
+
+ def image_path(self, image_index):
+ """ Get the path to an image.
+ """
+ raise NotImplementedError('image_path method not implemented')
+
+ def load_image(self, image_index):
+ """ Load an image at the image_index.
+ """
+ raise NotImplementedError('load_image method not implemented')
+
+ def load_annotations(self, image_index):
+ """ Load annotations for an image_index.
+ """
+ raise NotImplementedError('load_annotations method not implemented')
+
+ def load_annotations_group(self, group):
+ """ Load annotations for all images in group.
+ """
+ annotations_group = [self.load_annotations(image_index) for image_index in group]
+ for annotations in annotations_group:
+ assert(isinstance(annotations, dict)), '\'load_annotations\' should return a list of dictionaries, received: {}'.format(type(annotations))
+ assert('labels' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'
+ assert('bboxes' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'
+
+ return annotations_group
+
+ def filter_annotations(self, image_group, annotations_group, group):
+ """ Filter annotations by removing those that are outside of the image bounds or whose width/height < 0.
+ """
+ # test all annotations
+ for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
+ # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0]
+ invalid_indices = np.where(
+ (annotations['bboxes'][:, 2] <= annotations['bboxes'][:, 0]) |
+ (annotations['bboxes'][:, 3] <= annotations['bboxes'][:, 1]) |
+ (annotations['bboxes'][:, 0] < 0) |
+ (annotations['bboxes'][:, 1] < 0) |
+ (annotations['bboxes'][:, 2] > image.shape[1]) |
+ (annotations['bboxes'][:, 3] > image.shape[0])
+ )[0]
+
+ # delete invalid indices
+ if len(invalid_indices):
+ warnings.warn('Image {} with id {} (shape {}) contains the following invalid boxes: {}.'.format(
+ self.image_path(group[index]),
+ group[index],
+ image.shape,
+ annotations['bboxes'][invalid_indices, :]
+ ))
+ for k in annotations_group[index].keys():
+ annotations_group[index][k] = np.delete(annotations[k], invalid_indices, axis=0)
+ return image_group, annotations_group
+
+ def load_image_group(self, group):
+ """ Load images for all images in a group.
+ """
+ return [self.load_image(image_index) for image_index in group]
+
+ def random_visual_effect_group_entry(self, image, annotations):
+ """ Randomly transforms image and annotation.
+ """
+ visual_effect = next(self.visual_effect_generator)
+ # apply visual effect
+ image = visual_effect(image)
+ return image, annotations
+
+ def random_visual_effect_group(self, image_group, annotations_group):
+ """ Randomly apply visual effect on each image.
+ """
+ assert(len(image_group) == len(annotations_group))
+
+ if self.visual_effect_generator is None:
+ # do nothing
+ return image_group, annotations_group
+
+ for index in range(len(image_group)):
+ # apply effect on a single group entry
+ image_group[index], annotations_group[index] = self.random_visual_effect_group_entry(
+ image_group[index], annotations_group[index]
+ )
+
+ return image_group, annotations_group
+
+ def random_transform_group_entry(self, image, annotations, transform=None):
+ """ Randomly transforms image and annotation.
+ """
+ # randomly transform both image and annotations
+ if transform is not None or self.transform_generator:
+ if transform is None:
+ transform = adjust_transform_for_image(next(self.transform_generator), image, self.transform_parameters.relative_translation)
+
+ # apply transformation to image
+ image = apply_transform(transform, image, self.transform_parameters)
+
+ # Transform the bounding boxes in the annotations.
+ annotations['bboxes'] = annotations['bboxes'].copy()
+ for index in range(annotations['bboxes'].shape[0]):
+ annotations['bboxes'][index, :] = transform_aabb(transform, annotations['bboxes'][index, :])
+
+ return image, annotations
+
+ def random_transform_group(self, image_group, annotations_group):
+ """ Randomly transforms each image and its annotations.
+ """
+
+ assert(len(image_group) == len(annotations_group))
+
+ for index in range(len(image_group)):
+ # transform a single group entry
+ image_group[index], annotations_group[index] = self.random_transform_group_entry(image_group[index], annotations_group[index])
+
+ return image_group, annotations_group
+
+ def resize_image(self, image):
+ """ Resize an image using image_min_side and image_max_side.
+ """
+ if self.no_resize:
+ return image, 1
+ else:
+ return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
+
+ def preprocess_group_entry(self, image, annotations):
+ """ Preprocess image and its annotations.
+ """
+ # resize image
+ image, image_scale = self.resize_image(image)
+
+ # preprocess the image
+ image = self.preprocess_image(image)
+
+ # apply resizing to annotations too
+ annotations['bboxes'] *= image_scale
+
+ # convert to the wanted keras floatx
+ image = keras.backend.cast_to_floatx(image)
+
+ return image, annotations
+
+ def preprocess_group(self, image_group, annotations_group):
+ """ Preprocess each image and its annotations in its group.
+ """
+ assert(len(image_group) == len(annotations_group))
+
+ for index in range(len(image_group)):
+ # preprocess a single group entry
+ image_group[index], annotations_group[index] = self.preprocess_group_entry(image_group[index], annotations_group[index])
+
+ return image_group, annotations_group
+
+ def group_images(self):
+ """ Order the images according to self.order and makes groups of self.batch_size.
+ """
+ # determine the order of the images
+ order = list(range(self.size()))
+ if self.group_method == 'random':
+ random.shuffle(order)
+ elif self.group_method == 'ratio':
+ order.sort(key=lambda x: self.image_aspect_ratio(x))
+
+ # divide into groups, one group = one batch
+ self.groups = [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
+
+ def compute_inputs(self, image_group):
+ """ Compute inputs for the network using an image_group.
+ """
+ # get the max image shape
+ max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))
+
+ # construct an image batch object
+ image_batch = np.zeros((self.batch_size,) + max_shape, dtype=keras.backend.floatx())
+
+ # copy all images to the upper left part of the image batch object
+ for image_index, image in enumerate(image_group):
+ image_batch[image_index, :image.shape[0], :image.shape[1], :image.shape[2]] = image
+
+ if keras.backend.image_data_format() == 'channels_first':
+ image_batch = image_batch.transpose((0, 3, 1, 2))
+
+ return image_batch
+
+ def generate_anchors(self, image_shape):
+ anchor_params = None
+ pyramid_levels = None
+ if self.config and 'anchor_parameters' in self.config:
+ anchor_params = parse_anchor_parameters(self.config)
+ if self.config and 'pyramid_levels' in self.config:
+ pyramid_levels = parse_pyramid_levels(self.config)
+
+ return anchors_for_shape(image_shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels, shapes_callback=self.compute_shapes)
+
+ def compute_targets(self, image_group, annotations_group):
+ """ Compute target outputs for the network using images and their annotations.
+ """
+ # get the max image shape
+ max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))
+ anchors = self.generate_anchors(max_shape)
+
+ batches = self.compute_anchor_targets(
+ anchors,
+ image_group,
+ annotations_group,
+ self.num_classes()
+ )
+
+ return list(batches)
+
+ def compute_input_output(self, group):
+ """ Compute inputs and target outputs for the network.
+ """
+ # load images and annotations
+ image_group = self.load_image_group(group)
+ annotations_group = self.load_annotations_group(group)
+
+ # check validity of annotations
+ image_group, annotations_group = self.filter_annotations(image_group, annotations_group, group)
+
+ # randomly apply visual effect
+ image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group)
+
+ # randomly transform data
+ image_group, annotations_group = self.random_transform_group(image_group, annotations_group)
+
+ # perform preprocessing steps
+ image_group, annotations_group = self.preprocess_group(image_group, annotations_group)
+
+ # compute network inputs
+ inputs = self.compute_inputs(image_group)
+
+ # compute network targets
+ targets = self.compute_targets(image_group, annotations_group)
+
+ return inputs, targets
+
+ def __len__(self):
+ """
+ Number of batches for generator.
+ """
+
+ return len(self.groups)
+
+ def __getitem__(self, index):
+ """
+ Keras sequence method for generating batches.
+ """
+ group = self.groups[index]
+ inputs, targets = self.compute_input_output(group)
+
+ return inputs, targets
diff --git a/imageai/Detection/keras_retinanet/preprocessing/kitti.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/kitti.py
similarity index 65%
rename from imageai/Detection/keras_retinanet/preprocessing/kitti.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/kitti.py
index 8f420074..59225582 100644
--- a/imageai/Detection/keras_retinanet/preprocessing/kitti.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/kitti.py
@@ -37,12 +37,23 @@
class KittiGenerator(Generator):
+ """ Generate data for a KITTI dataset.
+
+ See http://www.cvlibs.net/datasets/kitti/ for more information.
+ """
+
def __init__(
self,
base_dir,
subset='train',
**kwargs
):
+ """ Initialize a KITTI data generator.
+
+ Args
+ base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
+ subset: The subset to generate data for (defaults to 'train').
+ """
self.base_dir = base_dir
label_dir = os.path.join(self.base_dir, subset, 'labels')
@@ -65,9 +76,10 @@ def __init__(
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
- self.id_to_labels = {}
- for label, id in kitti_classes.items():
- self.id_to_labels[id] = label
+ self.labels = {}
+ self.classes = kitti_classes
+ for name, label in self.classes.items():
+ self.labels[label] = name
self.image_data = dict()
self.images = []
@@ -94,33 +106,63 @@ def __init__(
super(KittiGenerator, self).__init__(**kwargs)
def size(self):
+ """ Size of the dataset.
+ """
return len(self.images)
def num_classes(self):
- return max(kitti_classes.values()) + 1
+ """ Number of classes in the dataset.
+ """
+ return max(self.classes.values()) + 1
+
+ def has_label(self, label):
+ """ Return True if label is a known label.
+ """
+ return label in self.labels
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ return name in self.classes
def name_to_label(self, name):
+ """ Map name to label.
+ """
raise NotImplementedError()
def label_to_name(self, label):
- return self.id_to_labels[label]
+ """ Map label to name.
+ """
+ return self.labels[label]
def image_aspect_ratio(self, image_index):
+ """ Compute the aspect ratio for an image with image_index.
+ """
# PIL is fast for metadata
image = Image.open(self.images[image_index])
return float(image.width) / float(image.height)
+ def image_path(self, image_index):
+ """ Get the path to an image.
+ """
+ return self.images[image_index]
+
def load_image(self, image_index):
- return read_image_bgr(self.images[image_index])
+ """ Load an image at the image_index.
+ """
+ return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
- annotations = self.image_data[image_index]
-
- boxes = np.zeros((len(annotations), 5))
- for idx, ann in enumerate(annotations):
- boxes[idx, 0] = float(ann['x1'])
- boxes[idx, 1] = float(ann['y1'])
- boxes[idx, 2] = float(ann['x2'])
- boxes[idx, 3] = float(ann['y2'])
- boxes[idx, 4] = int(ann['cls_id'])
- return boxes
+ """ Load annotations for an image_index.
+ """
+ image_data = self.image_data[image_index]
+ annotations = {'labels': np.empty((len(image_data),)), 'bboxes': np.empty((len(image_data), 4))}
+
+ for idx, ann in enumerate(image_data):
+ annotations['bboxes'][idx, 0] = float(ann['x1'])
+ annotations['bboxes'][idx, 1] = float(ann['y1'])
+ annotations['bboxes'][idx, 2] = float(ann['x2'])
+ annotations['bboxes'][idx, 3] = float(ann['y2'])
+ annotations['labels'][idx] = int(ann['cls_id'])
+
+ return annotations
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/open_images.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/open_images.py
new file mode 100644
index 00000000..a5ac7379
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/open_images.py
@@ -0,0 +1,375 @@
+"""
+Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import csv
+import json
+import os
+import warnings
+
+import numpy as np
+from PIL import Image
+
+from .generator import Generator
+from ..utils.image import read_image_bgr
+
+
+def load_hierarchy(metadata_dir, version='v4'):
+ hierarchy = None
+ if version == 'challenge2018':
+ hierarchy = 'bbox_labels_500_hierarchy.json'
+ elif version == 'v4':
+ hierarchy = 'bbox_labels_600_hierarchy.json'
+ elif version == 'v3':
+ hierarchy = 'bbox_labels_600_hierarchy.json'
+
+ hierarchy_json = os.path.join(metadata_dir, hierarchy)
+ with open(hierarchy_json) as f:
+ hierarchy_data = json.loads(f.read())
+
+ return hierarchy_data
+
+
+def load_hierarchy_children(hierarchy):
+ res = [hierarchy['LabelName']]
+
+ if 'Subcategory' in hierarchy:
+ for subcategory in hierarchy['Subcategory']:
+ children = load_hierarchy_children(subcategory)
+
+ for c in children:
+ res.append(c)
+
+ return res
+
+
+def find_hierarchy_parent(hierarchy, parent_cls):
+ if hierarchy['LabelName'] == parent_cls:
+ return hierarchy
+ elif 'Subcategory' in hierarchy:
+ for child in hierarchy['Subcategory']:
+ res = find_hierarchy_parent(child, parent_cls)
+ if res is not None:
+ return res
+
+ return None
+
+
+def get_labels(metadata_dir, version='v4'):
+ if version == 'v4' or version == 'challenge2018':
+ csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
+
+ boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
+ id_to_labels = {}
+ cls_index = {}
+
+ i = 0
+ with open(boxable_classes_descriptions) as f:
+ for row in csv.reader(f):
+ # make sure the csv row is not empty (usually the last one)
+ if len(row):
+ label = row[0]
+ description = row[1].replace("\"", "").replace("'", "").replace('`', '')
+
+ id_to_labels[i] = description
+ cls_index[label] = i
+
+ i += 1
+ else:
+ trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
+ description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
+
+ description_table = {}
+ with open(description_path) as f:
+ for row in csv.reader(f):
+ # make sure the csv row is not empty (usually the last one)
+ if len(row):
+ description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
+
+ with open(trainable_classes_path, 'rb') as f:
+ trainable_classes = f.read().split('\n')
+
+ id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
+ cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
+
+ return id_to_labels, cls_index
+
+
+def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
+ validation_image_ids = {}
+
+ if version == 'v4':
+ annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
+ elif version == 'challenge2018':
+ validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
+
+ with open(validation_image_ids_path, 'r') as csv_file:
+ reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
+ next(reader)
+ for line, row in enumerate(reader):
+ image_id = row['ImageID']
+ validation_image_ids[image_id] = True
+
+ annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
+ else:
+ annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
+
+ fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
+ 'XMin', 'XMax', 'YMin', 'YMax',
+ 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
+
+ id_annotations = dict()
+ with open(annotations_path, 'r') as csv_file:
+ reader = csv.DictReader(csv_file, fieldnames=fieldnames)
+ next(reader)
+
+ images_sizes = {}
+ for line, row in enumerate(reader):
+ frame = row['ImageID']
+
+ if version == 'challenge2018':
+ if subset == 'train':
+ if frame in validation_image_ids:
+ continue
+ elif subset == 'validation':
+ if frame not in validation_image_ids:
+ continue
+ else:
+ raise NotImplementedError('This generator handles only the train and validation subsets')
+
+ class_name = row['LabelName']
+
+ if class_name not in cls_index:
+ continue
+
+ cls_id = cls_index[class_name]
+
+ if version == 'challenge2018':
+ # We recommend participants to use the provided subset of the training set as a validation set.
+ # This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
+ img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
+ else:
+ img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
+
+ if frame in images_sizes:
+ width, height = images_sizes[frame]
+ else:
+ try:
+ with Image.open(img_path) as img:
+ width, height = img.width, img.height
+ images_sizes[frame] = (width, height)
+ except Exception as ex:
+ if version == 'challenge2018':
+ raise ex
+ continue
+
+ x1 = float(row['XMin'])
+ x2 = float(row['XMax'])
+ y1 = float(row['YMin'])
+ y2 = float(row['YMax'])
+
+ x1_int = int(round(x1 * width))
+ x2_int = int(round(x2 * width))
+ y1_int = int(round(y1 * height))
+ y2_int = int(round(y2 * height))
+
+ # Check that the bounding box is valid.
+ if x2 <= x1:
+ raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
+ if y2 <= y1:
+ raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
+
+ if y2_int == y1_int:
+ warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
+ continue
+
+ if x2_int == x1_int:
+ warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
+ continue
+
+ img_id = row['ImageID']
+ annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
+
+ if img_id in id_annotations:
+ annotations = id_annotations[img_id]
+ annotations['boxes'].append(annotation)
+ else:
+ id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
+ return id_annotations
+
+
+class OpenImagesGenerator(Generator):
+ def __init__(
+ self, main_dir, subset, version='v4',
+ labels_filter=None, annotation_cache_dir='.',
+ parent_label=None,
+ **kwargs
+ ):
+ if version == 'challenge2018':
+ metadata = 'challenge2018'
+ elif version == 'v4':
+ metadata = '2018_04'
+ elif version == 'v3':
+ metadata = '2017_11'
+ else:
+ raise NotImplementedError('There is currently no implementation for versions older than v3')
+
+ if version == 'challenge2018':
+ self.base_dir = os.path.join(main_dir, 'images', 'train')
+ else:
+ self.base_dir = os.path.join(main_dir, 'images', subset)
+
+ metadata_dir = os.path.join(main_dir, metadata)
+ annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
+
+ self.hierarchy = load_hierarchy(metadata_dir, version=version)
+ id_to_labels, cls_index = get_labels(metadata_dir, version=version)
+
+ if os.path.exists(annotation_cache_json):
+ with open(annotation_cache_json, 'r') as f:
+ self.annotations = json.loads(f.read())
+ else:
+ self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version=version)
+ json.dump(self.annotations, open(annotation_cache_json, "w"))
+
+ if labels_filter is not None or parent_label is not None:
+ self.id_to_labels, self.annotations = self.__filter_data(id_to_labels, cls_index, labels_filter, parent_label)
+ else:
+ self.id_to_labels = id_to_labels
+
+ self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
+
+ super(OpenImagesGenerator, self).__init__(**kwargs)
+
+ def __filter_data(self, id_to_labels, cls_index, labels_filter=None, parent_label=None):
+ """
+ If you want to work with a subset of the labels just set a list with trainable labels
+ :param labels_filter: Ex: labels_filter = ['Helmet', 'Hat', 'Analog television']
+ :param parent_label: If parent_label is set this will bring you the parent label
+ but also its children in the semantic hierarchy as defined in OID, ex: Animal
+ hierarchical tree
+ :return:
+ """
+
+ children_id_to_labels = {}
+
+ if parent_label is None:
+ # there is/are no other sublabel(s) other than the labels itself
+
+ for label in labels_filter:
+ for i, lb in id_to_labels.items():
+ if lb == label:
+ children_id_to_labels[i] = label
+ break
+ else:
+ parent_cls = None
+ for i, lb in id_to_labels.items():
+ if lb == parent_label:
+ parent_id = i
+ for c, index in cls_index.items():
+ if index == parent_id:
+ parent_cls = c
+ break
+
+ if parent_cls is None:
+ raise Exception('Couldnt find label {}'.format(parent_label))
+
+ parent_tree = find_hierarchy_parent(self.hierarchy, parent_cls)
+
+ if parent_tree is None:
+ raise Exception('Couldnt find parent {} in the semantic hierarchical tree'.format(parent_label))
+
+ children = load_hierarchy_children(parent_tree)
+
+ for cls in children:
+ index = cls_index[cls]
+ label = id_to_labels[index]
+ children_id_to_labels[index] = label
+
+ id_map = dict([(ind, i) for i, ind in enumerate(children_id_to_labels.keys())])
+
+ filtered_annotations = {}
+ for k in self.annotations:
+ img_ann = self.annotations[k]
+
+ filtered_boxes = []
+ for ann in img_ann['boxes']:
+ cls_id = ann['cls_id']
+ if cls_id in children_id_to_labels:
+ ann['cls_id'] = id_map[cls_id]
+ filtered_boxes.append(ann)
+
+ if len(filtered_boxes) > 0:
+ filtered_annotations[k] = {'w': img_ann['w'], 'h': img_ann['h'], 'boxes': filtered_boxes}
+
+ children_id_to_labels = dict([(id_map[i], l) for (i, l) in children_id_to_labels.items()])
+
+ return children_id_to_labels, filtered_annotations
+
+ def size(self):
+ return len(self.annotations)
+
+ def num_classes(self):
+ return len(self.id_to_labels)
+
+ def has_label(self, label):
+ """ Return True if label is a known label.
+ """
+ return label in self.id_to_labels
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ raise NotImplementedError()
+
+ def name_to_label(self, name):
+ raise NotImplementedError()
+
+ def label_to_name(self, label):
+ return self.id_to_labels[label]
+
+ def image_aspect_ratio(self, image_index):
+ img_annotations = self.annotations[self.id_to_image_id[image_index]]
+ height, width = img_annotations['h'], img_annotations['w']
+ return float(width) / float(height)
+
+ def image_path(self, image_index):
+ path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
+ return path
+
+ def load_image(self, image_index):
+ return read_image_bgr(self.image_path(image_index))
+
+ def load_annotations(self, image_index):
+ image_annotations = self.annotations[self.id_to_image_id[image_index]]
+
+ labels = image_annotations['boxes']
+ height, width = image_annotations['h'], image_annotations['w']
+
+ annotations = {'labels': np.empty((len(labels),)), 'bboxes': np.empty((len(labels), 4))}
+ for idx, ann in enumerate(labels):
+ cls_id = ann['cls_id']
+ x1 = ann['x1'] * width
+ x2 = ann['x2'] * width
+ y1 = ann['y1'] * height
+ y2 = ann['y2'] * height
+
+ annotations['bboxes'][idx, 0] = x1
+ annotations['bboxes'][idx, 1] = y1
+ annotations['bboxes'][idx, 2] = x2
+ annotations['bboxes'][idx, 3] = y2
+ annotations['labels'][idx] = cls_id
+
+ return annotations
diff --git a/imageai/Detection/keras_retinanet/preprocessing/pascal_voc.py b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/pascal_voc.py
similarity index 65%
rename from imageai/Detection/keras_retinanet/preprocessing/pascal_voc.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/pascal_voc.py
index e273aacb..3428fa8b 100644
--- a/imageai/Detection/keras_retinanet/preprocessing/pascal_voc.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/pascal_voc.py
@@ -51,7 +51,7 @@
}
-def _findNode(parent, name, debug_name = None, parse = None):
+def _findNode(parent, name, debug_name=None, parse=None):
if debug_name is None:
debug_name = name
@@ -67,6 +67,11 @@ def _findNode(parent, name, debug_name = None, parse = None):
class PascalVocGenerator(Generator):
+ """ Generate data for a Pascal VOC dataset.
+
+ See http://host.robots.ox.ac.uk/pascal/VOC/ for more information.
+ """
+
def __init__(
self,
data_dir,
@@ -77,10 +82,16 @@ def __init__(
skip_difficult=False,
**kwargs
):
+ """ Initialize a Pascal VOC data generator.
+
+ Args
+ base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
+ csv_class_file: Path to the CSV classes file.
+ """
self.data_dir = data_dir
self.set_name = set_name
self.classes = classes
- self.image_names = [l.strip().split(None, 1)[0] for l in open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()]
+ self.image_names = [line.strip().split(None, 1)[0] for line in open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()]
self.image_extension = image_extension
self.skip_truncated = skip_truncated
self.skip_difficult = skip_difficult
@@ -92,27 +103,55 @@ def __init__(
super(PascalVocGenerator, self).__init__(**kwargs)
def size(self):
+ """ Size of the dataset.
+ """
return len(self.image_names)
def num_classes(self):
+ """ Number of classes in the dataset.
+ """
return len(self.classes)
+ def has_label(self, label):
+ """ Return True if label is a known label.
+ """
+ return label in self.labels
+
+ def has_name(self, name):
+ """ Returns True if name is a known class.
+ """
+ return name in self.classes
+
def name_to_label(self, name):
+ """ Map name to label.
+ """
return self.classes[name]
def label_to_name(self, label):
+ """ Map label to name.
+ """
return self.labels[label]
def image_aspect_ratio(self, image_index):
+ """ Compute the aspect ratio for an image with image_index.
+ """
path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
image = Image.open(path)
return float(image.width) / float(image.height)
+ def image_path(self, image_index):
+ """ Get the path to an image.
+ """
+ return os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
+
def load_image(self, image_index):
- path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
- return read_image_bgr(path)
+ """ Load an image at the image_index.
+ """
+ return read_image_bgr(self.image_path(image_index))
def __parse_annotation(self, element):
+ """ Parse an annotation given an XML element.
+ """
truncated = _findNode(element, 'truncated', parse=int)
difficult = _findNode(element, 'difficult', parse=int)
@@ -120,26 +159,24 @@ def __parse_annotation(self, element):
if class_name not in self.classes:
raise ValueError('class name \'{}\' not found in classes: {}'.format(class_name, list(self.classes.keys())))
- box = np.zeros((1, 5))
- box[0, 4] = self.name_to_label(class_name)
+ box = np.zeros((4,))
+ label = self.name_to_label(class_name)
bndbox = _findNode(element, 'bndbox')
- box[0, 0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1
- box[0, 1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1
- box[0, 2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1
- box[0, 3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1
+ box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1
+ box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1
+ box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1
+ box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1
- return truncated, difficult, box
+ return truncated, difficult, box, label
def __parse_annotations(self, xml_root):
- size_node = _findNode(xml_root, 'size')
- width = _findNode(size_node, 'width', 'size.width', parse=float)
- height = _findNode(size_node, 'height', 'size.height', parse=float)
-
- boxes = np.zeros((0, 5))
+ """ Parse all annotations under the xml_root.
+ """
+ annotations = {'labels': np.empty((len(xml_root.findall('object')),)), 'bboxes': np.empty((len(xml_root.findall('object')), 4))}
for i, element in enumerate(xml_root.iter('object')):
try:
- truncated, difficult, box = self.__parse_annotation(element)
+ truncated, difficult, box, label = self.__parse_annotation(element)
except ValueError as e:
raise_from(ValueError('could not parse object #{}: {}'.format(i, e)), None)
@@ -147,11 +184,15 @@ def __parse_annotations(self, xml_root):
continue
if difficult and self.skip_difficult:
continue
- boxes = np.append(boxes, box, axis=0)
- return boxes
+ annotations['bboxes'][i, :] = box
+ annotations['labels'][i] = label
+
+ return annotations
def load_annotations(self, image_index):
+ """ Load annotations for an image_index.
+ """
filename = self.image_names[image_index] + '.xml'
try:
tree = ET.parse(os.path.join(self.data_dir, 'Annotations', filename))
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/__init__.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/anchors.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/anchors.py
new file mode 100644
index 00000000..12257d32
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/anchors.py
@@ -0,0 +1,318 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from tensorflow import keras
+
+#from ..utils.compute_overlap import compute_overlap
+
+
+class AnchorParameters:
+ """ The parameteres that define how anchors are generated.
+
+ Args
+ sizes : List of sizes to use. Each size corresponds to one feature level.
+ strides : List of strides to use. Each stride correspond to one feature level.
+ ratios : List of ratios to use per location in a feature map.
+ scales : List of scales to use per location in a feature map.
+ """
+ def __init__(self, sizes, strides, ratios, scales):
+ self.sizes = sizes
+ self.strides = strides
+ self.ratios = ratios
+ self.scales = scales
+
+ def num_anchors(self):
+ return len(self.ratios) * len(self.scales)
+
+
+"""
+The default anchor parameters.
+"""
+AnchorParameters.default = AnchorParameters(
+ sizes = [32, 64, 128, 256, 512],
+ strides = [8, 16, 32, 64, 128],
+ ratios = np.array([0.5, 1, 2], keras.backend.floatx()),
+ scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
+)
+
+
+def anchor_targets_bbox(
+ anchors,
+ image_group,
+ annotations_group,
+ num_classes,
+ negative_overlap=0.4,
+ positive_overlap=0.5
+):
+ """ Generate anchor targets for bbox detection.
+
+ Args
+ anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
+ image_group: List of BGR images.
+ annotations_group: List of annotation dictionaries with each annotation containing 'labels' and 'bboxes' of an image.
+ num_classes: Number of classes to predict.
+ mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image.
+ negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
+ positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).
+
+ Returns
+ labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1),
+ where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg).
+ regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
+ where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the
+ last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
+ """
+
+ assert(len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal."
+ assert(len(annotations_group) > 0), "No data received to compute anchor targets for."
+ for annotations in annotations_group:
+ assert('bboxes' in annotations), "Annotations should contain bboxes."
+ assert('labels' in annotations), "Annotations should contain labels."
+
+ batch_size = len(image_group)
+
+ regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx())
+ labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx())
+
+ # compute labels and regression targets
+ for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
+ if annotations['bboxes'].shape[0]:
+ # obtain indices of gt annotations with the greatest overlap
+ positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors, annotations['bboxes'], negative_overlap, positive_overlap)
+
+ labels_batch[index, ignore_indices, -1] = -1
+ labels_batch[index, positive_indices, -1] = 1
+
+ regression_batch[index, ignore_indices, -1] = -1
+ regression_batch[index, positive_indices, -1] = 1
+
+ # compute target class labels
+ labels_batch[index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1
+
+ regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :])
+
+ # ignore annotations outside of image
+ if image.shape:
+ anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T
+ indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0])
+
+ labels_batch[index, indices, -1] = -1
+ regression_batch[index, indices, -1] = -1
+
+ return regression_batch, labels_batch
+
+
+def layer_shapes(image_shape, model):
+ """Compute layer shapes given input image shape and the model.
+
+ Args
+ image_shape: The shape of the image.
+ model: The model to use for computing how the image shape is transformed in the pyramid.
+
+ Returns
+ A dictionary mapping layer names to image shapes.
+ """
+ shape = {
+ model.layers[0].name: (None,) + image_shape,
+ }
+
+ for layer in model.layers[1:]:
+ nodes = layer._inbound_nodes
+ for node in nodes:
+ if isinstance(node.inbound_layers, keras.layers.Layer):
+ inputs = [shape[node.inbound_layers.name]]
+ else:
+ inputs = [shape[lr.name] for lr in node.inbound_layers]
+ if not inputs:
+ continue
+ shape[layer.name] = layer.compute_output_shape(inputs[0] if len(inputs) == 1 else inputs)
+
+ return shape
+
+
+def make_shapes_callback(model):
+ """ Make a function for getting the shape of the pyramid levels.
+ """
+ def get_shapes(image_shape, pyramid_levels):
+ shape = layer_shapes(image_shape, model)
+ image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels]
+ return image_shapes
+
+ return get_shapes
+
+
+def guess_shapes(image_shape, pyramid_levels):
+ """Guess shapes based on pyramid levels.
+
+ Args
+ image_shape: The shape of the image.
+ pyramid_levels: A list of what pyramid levels are used.
+
+ Returns
+ A list of image shapes at each pyramid level.
+ """
+ image_shape = np.array(image_shape[:2])
+ image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
+ return image_shapes
+
+
+def anchors_for_shape(
+ image_shape,
+ pyramid_levels=None,
+ anchor_params=None,
+ shapes_callback=None,
+):
+ """ Generators anchors for a given shape.
+
+ Args
+ image_shape: The shape of the image.
+ pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]).
+ anchor_params: Struct containing anchor parameters. If None, default values are used.
+ shapes_callback: Function to call for getting the shape of the image at different pyramid levels.
+
+ Returns
+ np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors.
+ """
+
+ if pyramid_levels is None:
+ pyramid_levels = [3, 4, 5, 6, 7]
+
+ if anchor_params is None:
+ anchor_params = AnchorParameters.default
+
+ if shapes_callback is None:
+ shapes_callback = guess_shapes
+ image_shapes = shapes_callback(image_shape, pyramid_levels)
+
+ # compute anchors over all pyramid levels
+ all_anchors = np.zeros((0, 4))
+ for idx, p in enumerate(pyramid_levels):
+ anchors = generate_anchors(
+ base_size=anchor_params.sizes[idx],
+ ratios=anchor_params.ratios,
+ scales=anchor_params.scales
+ )
+ shifted_anchors = shift(image_shapes[idx], anchor_params.strides[idx], anchors)
+ all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
+
+ return all_anchors
+
+
+def shift(shape, stride, anchors):
+ """ Produce shifted anchors based on shape of the map and stride size.
+
+ Args
+ shape : Shape to shift the anchors over.
+ stride : Stride to shift the anchors with over the shape.
+ anchors: The anchors to apply at each location.
+ """
+
+ # create a grid starting from half stride from the top left corner
+ shift_x = (np.arange(0, shape[1]) + 0.5) * stride
+ shift_y = (np.arange(0, shape[0]) + 0.5) * stride
+
+ shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+
+ shifts = np.vstack((
+ shift_x.ravel(), shift_y.ravel(),
+ shift_x.ravel(), shift_y.ravel()
+ )).transpose()
+
+ # add A anchors (1, A, 4) to
+ # cell K shifts (K, 1, 4) to get
+ # shift anchors (K, A, 4)
+ # reshape to (K*A, 4) shifted anchors
+ A = anchors.shape[0]
+ K = shifts.shape[0]
+ all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+ all_anchors = all_anchors.reshape((K * A, 4))
+
+ return all_anchors
+
+
+def generate_anchors(base_size=16, ratios=None, scales=None):
+ """
+ Generate anchor (reference) windows by enumerating aspect ratios X
+ scales w.r.t. a reference window.
+ """
+
+ if ratios is None:
+ ratios = AnchorParameters.default.ratios
+
+ if scales is None:
+ scales = AnchorParameters.default.scales
+
+ num_anchors = len(ratios) * len(scales)
+
+ # initialize output anchors
+ anchors = np.zeros((num_anchors, 4))
+
+ # scale base_size
+ anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
+
+ # compute areas of anchors
+ areas = anchors[:, 2] * anchors[:, 3]
+
+ # correct for ratios
+ anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
+ anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
+
+ # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
+ anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
+ anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
+
+ return anchors
+
+
+def bbox_transform(anchors, gt_boxes, mean=None, std=None):
+ """Compute bounding-box regression targets for an image."""
+
+ # The Mean and std are calculated from COCO dataset.
+ # Bounding box normalization was firstly introduced in the Fast R-CNN paper.
+ # See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825 for more details
+ if mean is None:
+ mean = np.array([0, 0, 0, 0])
+ if std is None:
+ std = np.array([0.2, 0.2, 0.2, 0.2])
+
+ if isinstance(mean, (list, tuple)):
+ mean = np.array(mean)
+ elif not isinstance(mean, np.ndarray):
+ raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
+
+ if isinstance(std, (list, tuple)):
+ std = np.array(std)
+ elif not isinstance(std, np.ndarray):
+ raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
+
+ anchor_widths = anchors[:, 2] - anchors[:, 0]
+ anchor_heights = anchors[:, 3] - anchors[:, 1]
+
+ # According to the information provided by a keras-retinanet author, they got marginally better results using
+ # the following way of bounding box parametrization.
+ # See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825 for more details
+ targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths
+ targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights
+ targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths
+ targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights
+
+ targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2))
+ targets = targets.T
+
+ targets = (targets - mean) / std
+
+ return targets
diff --git a/imageai/Detection/keras_retinanet/utils/coco_eval.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/coco_eval.py
similarity index 61%
rename from imageai/Detection/keras_retinanet/utils/coco_eval.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/coco_eval.py
index 72b3062c..7ad020e9 100644
--- a/imageai/Detection/keras_retinanet/utils/coco_eval.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/coco_eval.py
@@ -14,49 +14,57 @@
limitations under the License.
"""
-from __future__ import print_function
-
-from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
+from tensorflow import keras
import numpy as np
import json
-import os
+
+import progressbar
+assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def evaluate_coco(generator, model, threshold=0.05):
+ """ Use the pycocotools to evaluate a COCO model on a dataset.
+
+ Args
+ generator : The generator for generating the evaluation data.
+ model : The model to evaluate.
+ threshold : The score threshold to use.
+ """
# start collecting results
results = []
image_ids = []
- for index in range(generator.size()):
+ for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '):
image = generator.load_image(index)
image = generator.preprocess_image(image)
image, scale = generator.resize_image(image)
- # run network
- _, _, detections = model.predict_on_batch(np.expand_dims(image, axis=0))
+ if keras.backend.image_data_format() == 'channels_first':
+ image = image.transpose((2, 0, 1))
- # clip to image shape
- detections[:, :, 0] = np.maximum(0, detections[:, :, 0])
- detections[:, :, 1] = np.maximum(0, detections[:, :, 1])
- detections[:, :, 2] = np.minimum(image.shape[1], detections[:, :, 2])
- detections[:, :, 3] = np.minimum(image.shape[0], detections[:, :, 3])
+ # run network
+ boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
# correct boxes for image scale
- detections[0, :, :4] /= scale
+ boxes /= scale
# change to (x, y, w, h) (MS COCO standard)
- detections[:, :, 2] -= detections[:, :, 0]
- detections[:, :, 3] -= detections[:, :, 1]
+ boxes[:, :, 2] -= boxes[:, :, 0]
+ boxes[:, :, 3] -= boxes[:, :, 1]
# compute predicted labels and scores
- for i, j in np.transpose(np.where(detections[0, :, 4:] > threshold)):
- # append detections for each positively labeled class
+ for box, score, label in zip(boxes[0], scores[0], labels[0]):
+ # scores are sorted, so we can break
+ if score < threshold:
+ break
+
+ # append detection for each positively labeled class
image_result = {
'image_id' : generator.image_ids[index],
- 'category_id' : generator.label_to_coco_label(j),
- 'score' : float(detections[0, i, 4 + j]),
- 'bbox' : (detections[0, i, :4]).tolist(),
+ 'category_id' : generator.label_to_coco_label(label),
+ 'score' : float(score),
+ 'bbox' : box.tolist(),
}
# append detection to results
@@ -65,9 +73,6 @@ def evaluate_coco(generator, model, threshold=0.05):
# append image to list of processed images
image_ids.append(generator.image_ids[index])
- # print progress
- print('{}/{}'.format(index, generator.size()), end='\r')
-
if not len(results):
return
@@ -85,3 +90,4 @@ def evaluate_coco(generator, model, threshold=0.05):
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
+ return coco_eval.stats
diff --git a/imageai/Detection/keras_retinanet/utils/colors.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/colors.py
similarity index 99%
rename from imageai/Detection/keras_retinanet/utils/colors.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/colors.py
index e3674ac2..7f1b6850 100644
--- a/imageai/Detection/keras_retinanet/utils/colors.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/colors.py
@@ -18,6 +18,7 @@ def label_color(label):
warnings.warn('Label {} has no color, returning default.'.format(label))
return (0, 255, 0)
+
"""
Generated using:
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/compute_overlap.pyx b/imageai_tf_deprecated/Detection/keras_retinanet/utils/compute_overlap.pyx
new file mode 100644
index 00000000..e8b79301
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/compute_overlap.pyx
@@ -0,0 +1,53 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Sergey Karayev
+# --------------------------------------------------------
+
+cimport cython
+import numpy as np
+cimport numpy as np
+
+
+def compute_overlap(
+ np.ndarray[double, ndim=2] boxes,
+ np.ndarray[double, ndim=2] query_boxes
+):
+ """
+ Args
+ a: (N, 4) ndarray of float
+ b: (K, 4) ndarray of float
+
+ Returns
+ overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ """
+ cdef unsigned int N = boxes.shape[0]
+ cdef unsigned int K = query_boxes.shape[0]
+ cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64)
+ cdef double iw, ih, box_area
+ cdef double ua
+ cdef unsigned int k, n
+ for k in range(K):
+ box_area = (
+ (query_boxes[k, 2] - query_boxes[k, 0]) *
+ (query_boxes[k, 3] - query_boxes[k, 1])
+ )
+ for n in range(N):
+ iw = (
+ min(boxes[n, 2], query_boxes[k, 2]) -
+ max(boxes[n, 0], query_boxes[k, 0])
+ )
+ if iw > 0:
+ ih = (
+ min(boxes[n, 3], query_boxes[k, 3]) -
+ max(boxes[n, 1], query_boxes[k, 1])
+ )
+ if ih > 0:
+ ua = np.float64(
+ (boxes[n, 2] - boxes[n, 0]) *
+ (boxes[n, 3] - boxes[n, 1]) +
+ box_area - iw * ih
+ )
+ overlaps[n, k] = iw * ih / ua
+ return overlaps
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/config.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/config.py
new file mode 100644
index 00000000..58de9228
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/config.py
@@ -0,0 +1,57 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import configparser
+import numpy as np
+from tensorflow import keras
+from ..utils.anchors import AnchorParameters
+
+
+def read_config_file(config_path):
+ config = configparser.ConfigParser()
+
+ with open(config_path, 'r') as file:
+ config.read_file(file)
+
+ assert 'anchor_parameters' in config, \
+ "Malformed config file. Verify that it contains the anchor_parameters section."
+
+ config_keys = set(config['anchor_parameters'])
+ default_keys = set(AnchorParameters.default.__dict__.keys())
+
+ assert config_keys <= default_keys, \
+ "Malformed config file. These keys are not valid: {}".format(config_keys - default_keys)
+
+ if 'pyramid_levels' in config:
+ assert('levels' in config['pyramid_levels']), "pyramid levels specified by levels key"
+
+ return config
+
+
+def parse_anchor_parameters(config):
+ ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx())
+ scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx())
+ sizes = list(map(int, config['anchor_parameters']['sizes'].split(' ')))
+ strides = list(map(int, config['anchor_parameters']['strides'].split(' ')))
+ assert (len(sizes) == len(strides)), "sizes and strides should have an equal number of values"
+
+ return AnchorParameters(sizes, strides, ratios, scales)
+
+
+def parse_pyramid_levels(config):
+ levels = list(map(int, config['pyramid_levels']['levels'].split(' ')))
+
+ return levels
diff --git a/imageai/Detection/keras_retinanet/utils/eval.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/eval.py
similarity index 75%
rename from imageai/Detection/keras_retinanet/utils/eval.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/eval.py
index 88637e2e..f6b723fc 100644
--- a/imageai/Detection/keras_retinanet/utils/eval.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/eval.py
@@ -14,16 +14,17 @@
limitations under the License.
"""
-from __future__ import print_function
-
from .anchors import compute_overlap
from .visualization import draw_detections, draw_annotations
+from tensorflow import keras
import numpy as np
import os
+import time
import cv2
-import pickle
+import progressbar
+assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def _compute_ap(recall, precision):
@@ -70,56 +71,56 @@ def _get_detections(generator, model, score_threshold=0.05, max_detections=100,
# Returns
A list of lists containing the detections for each image in the generator.
"""
- all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+ all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())]
+ all_inferences = [None for i in range(generator.size())]
- for i in range(generator.size()):
+ for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '):
raw_image = generator.load_image(i)
- image = generator.preprocess_image(raw_image.copy())
- image, scale = generator.resize_image(image)
+ image, scale = generator.resize_image(raw_image.copy())
+ image = generator.preprocess_image(image)
- # run network
- _, _, detections = model.predict_on_batch(np.expand_dims(image, axis=0))
+ if keras.backend.image_data_format() == 'channels_first':
+ image = image.transpose((2, 0, 1))
- # clip to image shape
- detections[:, :, 0] = np.maximum(0, detections[:, :, 0])
- detections[:, :, 1] = np.maximum(0, detections[:, :, 1])
- detections[:, :, 2] = np.minimum(image.shape[1], detections[:, :, 2])
- detections[:, :, 3] = np.minimum(image.shape[0], detections[:, :, 3])
+ # run network
+ start = time.time()
+ boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))[:3]
+ inference_time = time.time() - start
# correct boxes for image scale
- detections[0, :, :4] /= scale
-
- # select scores from detections
- scores = detections[0, :, 4:]
+ boxes /= scale
# select indices which have a score above the threshold
- indices = np.where(detections[0, :, 4:] > score_threshold)
+ indices = np.where(scores[0, :] > score_threshold)[0]
# select those scores
- scores = scores[indices]
+ scores = scores[0][indices]
# find the order with which to sort the scores
scores_sort = np.argsort(-scores)[:max_detections]
# select detections
- image_boxes = detections[0, indices[0][scores_sort], :4]
- image_scores = np.expand_dims(detections[0, indices[0][scores_sort], 4 + indices[1][scores_sort]], axis=1)
- image_detections = np.append(image_boxes, image_scores, axis=1)
- image_predicted_labels = indices[1][scores_sort]
+ image_boxes = boxes[0, indices[scores_sort], :]
+ image_scores = scores[scores_sort]
+ image_labels = labels[0, indices[scores_sort]]
+ image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
if save_path is not None:
- draw_annotations(raw_image, generator.load_annotations(i), generator=generator)
- draw_detections(raw_image, detections[0, indices[0][scores_sort], :], generator=generator)
+ draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
+ draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name, score_threshold=score_threshold)
cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
# copy detections to all_detections
for label in range(generator.num_classes()):
- all_detections[i][label] = image_detections[image_predicted_labels == label, :]
+ if not generator.has_label(label):
+ continue
- print('{}/{}'.format(i, generator.size()), end='\r')
+ all_detections[i][label] = image_detections[image_detections[:, -1] == label, :-1]
- return all_detections
+ all_inferences[i] = inference_time
+
+ return all_detections, all_inferences
def _get_annotations(generator):
@@ -135,15 +136,16 @@ def _get_annotations(generator):
"""
all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
- for i in range(generator.size()):
+ for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '):
# load the annotations
annotations = generator.load_annotations(i)
# copy detections to all_annotations
for label in range(generator.num_classes()):
- all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
+ if not generator.has_label(label):
+ continue
- print('{}/{}'.format(i, generator.size()), end='\r')
+ all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy()
return all_annotations
@@ -169,7 +171,7 @@ def evaluate(
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
- all_detections = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
+ all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
all_annotations = _get_annotations(generator)
average_precisions = {}
@@ -180,6 +182,9 @@ def evaluate(
# process detections and annotations
for label in range(generator.num_classes()):
+ if not generator.has_label(label):
+ continue
+
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
@@ -213,7 +218,7 @@ def evaluate(
# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
- average_precisions[label] = 0
+ average_precisions[label] = 0, 0
continue
# sort by score
@@ -231,6 +236,9 @@ def evaluate(
# compute average precision
average_precision = _compute_ap(recall, precision)
- average_precisions[label] = average_precision
+ average_precisions[label] = average_precision, num_annotations
+
+ # inference time
+ inference_time = np.sum(all_inferences) / generator.size()
- return average_precisions
+ return average_precisions, inference_time
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/gpu.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/gpu.py
new file mode 100644
index 00000000..067f30b4
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/gpu.py
@@ -0,0 +1,43 @@
+"""
+Copyright 2017-2019 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import tensorflow as tf
+
+
+def setup_gpu(gpu_id):
+ try:
+ visible_gpu_indices = [int(id) for id in gpu_id.split(',')]
+ available_gpus = tf.config.list_physical_devices('GPU')
+ visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices]
+
+ if visible_gpus:
+ try:
+ # Currently, memory growth needs to be the same across GPUs.
+ for gpu in available_gpus:
+ tf.config.experimental.set_memory_growth(gpu, True)
+
+ # Use only the selcted gpu.
+ tf.config.set_visible_devices(visible_gpus, 'GPU')
+ except RuntimeError as e:
+ # Visible devices must be set before GPUs have been initialized.
+ print(e)
+
+ logical_gpus = tf.config.list_logical_devices('GPU')
+ print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
+ else:
+ tf.config.set_visible_devices([], 'GPU')
+ except ValueError:
+ tf.config.set_visible_devices([], 'GPU')
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/image.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/image.py
new file mode 100644
index 00000000..b3116cd9
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/image.py
@@ -0,0 +1,356 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import division
+import numpy as np
+import cv2
+from PIL import Image
+
+from .transform import change_transform_origin
+
+
+def read_image_bgr(path):
+ """ Read an image in BGR format.
+
+ Args
+ path: Path to the image.
+ """
+ # We deliberately don't use cv2.imread here, since it gives no feedback on errors while reading the image.
+ image = np.ascontiguousarray(Image.open(path).convert('RGB'))
+ return image[:, :, ::-1]
+
+
+def preprocess_image(x, mode='caffe'):
+ """ Preprocess an image by subtracting the ImageNet mean.
+
+ Args
+ x: np.array of shape (None, None, 3) or (3, None, None).
+ mode: One of "caffe" or "tf".
+ - caffe: will zero-center each color channel with
+ respect to the ImageNet dataset, without scaling.
+ - tf: will scale pixels between -1 and 1, sample-wise.
+
+ Returns
+ The input with the ImageNet mean subtracted.
+ """
+ # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py"
+ # except for converting RGB -> BGR since we assume BGR already
+
+ # covert always to float32 to keep compatibility with opencv
+ x = x.astype(np.float32)
+
+ if mode == 'tf':
+ x /= 127.5
+ x -= 1.
+ elif mode == 'caffe':
+ x -= [103.939, 116.779, 123.68]
+
+ return x
+
+
+def adjust_transform_for_image(transform, image, relative_translation):
+ """ Adjust a transformation for a specific image.
+
+ The translation of the matrix will be scaled with the size of the image.
+ The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image.
+ """
+ height, width, channels = image.shape
+
+ result = transform
+
+ # Scale the translation with the image size if specified.
+ if relative_translation:
+ result[0:2, 2] *= [width, height]
+
+ # Move the origin of transformation.
+ result = change_transform_origin(transform, (0.5 * width, 0.5 * height))
+
+ return result
+
+
+class TransformParameters:
+ """ Struct holding parameters determining how to apply a transformation to an image.
+
+ Args
+ fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap'
+ interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4'
+ cval: Fill value to use with fill_mode='constant'
+ relative_translation: If true (the default), interpret translation as a factor of the image size.
+ If false, interpret it as absolute pixels.
+ """
+ def __init__(
+ self,
+ fill_mode = 'nearest',
+ interpolation = 'linear',
+ cval = 0,
+ relative_translation = True,
+ ):
+ self.fill_mode = fill_mode
+ self.cval = cval
+ self.interpolation = interpolation
+ self.relative_translation = relative_translation
+
+ def cvBorderMode(self):
+ if self.fill_mode == 'constant':
+ return cv2.BORDER_CONSTANT
+ if self.fill_mode == 'nearest':
+ return cv2.BORDER_REPLICATE
+ if self.fill_mode == 'reflect':
+ return cv2.BORDER_REFLECT_101
+ if self.fill_mode == 'wrap':
+ return cv2.BORDER_WRAP
+
+ def cvInterpolation(self):
+ if self.interpolation == 'nearest':
+ return cv2.INTER_NEAREST
+ if self.interpolation == 'linear':
+ return cv2.INTER_LINEAR
+ if self.interpolation == 'cubic':
+ return cv2.INTER_CUBIC
+ if self.interpolation == 'area':
+ return cv2.INTER_AREA
+ if self.interpolation == 'lanczos4':
+ return cv2.INTER_LANCZOS4
+
+
+def apply_transform(matrix, image, params):
+ """
+ Apply a transformation to an image.
+
+ The origin of transformation is at the top left corner of the image.
+
+ The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image.
+ Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space.
+
+ Args
+ matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply.
+ image: The image to transform.
+ params: The transform parameters (see TransformParameters)
+ """
+ output = cv2.warpAffine(
+ image,
+ matrix[:2, :],
+ dsize = (image.shape[1], image.shape[0]),
+ flags = params.cvInterpolation(),
+ borderMode = params.cvBorderMode(),
+ borderValue = params.cval,
+ )
+ return output
+
+
+def compute_resize_scale(image_shape, min_side=800, max_side=1333):
+ """ Compute an image scale such that the image size is constrained to min_side and max_side.
+
+ Args
+ min_side: The image's min side will be equal to min_side after resizing.
+ max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
+
+ Returns
+ A resizing scale.
+ """
+ (rows, cols, _) = image_shape
+
+ smallest_side = min(rows, cols)
+
+ # rescale the image so the smallest side is min_side
+ scale = min_side / smallest_side
+
+ # check if the largest side is now greater than max_side, which can happen
+ # when images have a large aspect ratio
+ largest_side = max(rows, cols)
+ if largest_side * scale > max_side:
+ scale = max_side / largest_side
+
+ return scale
+
+
+def resize_image(img, min_side=800, max_side=1333):
+ """ Resize an image such that the size is constrained to min_side and max_side.
+
+ Args
+ min_side: The image's min side will be equal to min_side after resizing.
+ max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
+
+ Returns
+ A resized image.
+ """
+ # compute scale to resize the image
+ scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side)
+
+ # resize the image with the computed scale
+ img = cv2.resize(img, None, fx=scale, fy=scale)
+
+ return img, scale
+
+
+def _uniform(val_range):
+ """ Uniformly sample from the given range.
+
+ Args
+ val_range: A pair of lower and upper bound.
+ """
+ return np.random.uniform(val_range[0], val_range[1])
+
+
+def _check_range(val_range, min_val=None, max_val=None):
+ """ Check whether the range is a valid range.
+
+ Args
+ val_range: A pair of lower and upper bound.
+ min_val: Minimal value for the lower bound.
+ max_val: Maximal value for the upper bound.
+ """
+ if val_range[0] > val_range[1]:
+ raise ValueError('interval lower bound > upper bound')
+ if min_val is not None and val_range[0] < min_val:
+ raise ValueError('invalid interval lower bound')
+ if max_val is not None and val_range[1] > max_val:
+ raise ValueError('invalid interval upper bound')
+
+
+def _clip(image):
+ """
+ Clip and convert an image to np.uint8.
+
+ Args
+ image: Image to clip.
+ """
+ return np.clip(image, 0, 255).astype(np.uint8)
+
+
+class VisualEffect:
+ """ Struct holding parameters and applying image color transformation.
+
+ Args
+ contrast_factor: A factor for adjusting contrast. Should be between 0 and 3.
+ brightness_delta: Brightness offset between -1 and 1 added to the pixel values.
+ hue_delta: Hue offset between -1 and 1 added to the hue channel.
+ saturation_factor: A factor multiplying the saturation values of each pixel.
+ """
+
+ def __init__(
+ self,
+ contrast_factor,
+ brightness_delta,
+ hue_delta,
+ saturation_factor,
+ ):
+ self.contrast_factor = contrast_factor
+ self.brightness_delta = brightness_delta
+ self.hue_delta = hue_delta
+ self.saturation_factor = saturation_factor
+
+ def __call__(self, image):
+ """ Apply a visual effect on the image.
+
+ Args
+ image: Image to adjust
+ """
+
+ if self.contrast_factor:
+ image = adjust_contrast(image, self.contrast_factor)
+ if self.brightness_delta:
+ image = adjust_brightness(image, self.brightness_delta)
+
+ if self.hue_delta or self.saturation_factor:
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+
+ if self.hue_delta:
+ image = adjust_hue(image, self.hue_delta)
+ if self.saturation_factor:
+ image = adjust_saturation(image, self.saturation_factor)
+
+ image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
+
+ return image
+
+
+def random_visual_effect_generator(
+ contrast_range=(0.9, 1.1),
+ brightness_range=(-.1, .1),
+ hue_range=(-0.05, 0.05),
+ saturation_range=(0.95, 1.05)
+):
+ """ Generate visual effect parameters uniformly sampled from the given intervals.
+
+ Args
+ contrast_factor: A factor interval for adjusting contrast. Should be between 0 and 3.
+ brightness_delta: An interval between -1 and 1 for the amount added to the pixels.
+ hue_delta: An interval between -1 and 1 for the amount added to the hue channel.
+ The values are rotated if they exceed 180.
+ saturation_factor: An interval for the factor multiplying the saturation values of each
+ pixel.
+ """
+ _check_range(contrast_range, 0)
+ _check_range(brightness_range, -1, 1)
+ _check_range(hue_range, -1, 1)
+ _check_range(saturation_range, 0)
+
+ def _generate():
+ while True:
+ yield VisualEffect(
+ contrast_factor=_uniform(contrast_range),
+ brightness_delta=_uniform(brightness_range),
+ hue_delta=_uniform(hue_range),
+ saturation_factor=_uniform(saturation_range),
+ )
+
+ return _generate()
+
+
+def adjust_contrast(image, factor):
+ """ Adjust contrast of an image.
+
+ Args
+ image: Image to adjust.
+ factor: A factor for adjusting contrast.
+ """
+ mean = image.mean(axis=0).mean(axis=0)
+ return _clip((image - mean) * factor + mean)
+
+
+def adjust_brightness(image, delta):
+ """ Adjust brightness of an image
+
+ Args
+ image: Image to adjust.
+ delta: Brightness offset between -1 and 1 added to the pixel values.
+ """
+ return _clip(image + delta * 255)
+
+
+def adjust_hue(image, delta):
+ """ Adjust hue of an image.
+
+ Args
+ image: Image to adjust.
+ delta: An interval between -1 and 1 for the amount added to the hue channel.
+ The values are rotated if they exceed 180.
+ """
+ image[..., 0] = np.mod(image[..., 0] + delta * 180, 180)
+ return image
+
+
+def adjust_saturation(image, factor):
+ """ Adjust saturation of an image.
+
+ Args
+ image: Image to adjust.
+ factor: An interval for the factor multiplying the saturation values of each pixel.
+ """
+ image[..., 1] = np.clip(image[..., 1] * factor, 0 , 255)
+ return image
diff --git a/imageai/Detection/keras_retinanet/utils/model.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/model.py
similarity index 100%
rename from imageai/Detection/keras_retinanet/utils/model.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/model.py
diff --git a/imageai_tf_deprecated/Detection/keras_retinanet/utils/tf_version.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/tf_version.py
new file mode 100644
index 00000000..5a9aa90b
--- /dev/null
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/tf_version.py
@@ -0,0 +1,55 @@
+"""
+Copyright 2017-2019 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import print_function
+
+import tensorflow as tf
+import sys
+
+MINIMUM_TF_VERSION = 2, 3, 0
+BLACKLISTED_TF_VERSIONS = []
+
+
+def tf_version():
+ """ Get the Tensorflow version.
+ Returns
+ tuple of (major, minor, patch).
+ """
+ return tuple(map(int, tf.version.VERSION.split('-')[0].split('.')))
+
+
+def tf_version_ok(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
+ """ Check if the current Tensorflow version is higher than the minimum version.
+ """
+ return tf_version() >= minimum_tf_version and tf_version() not in blacklisted
+
+
+def assert_tf_version(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
+ """ Assert that the Tensorflow version is up to date.
+ """
+ detected = tf.version.VERSION
+ required = '.'.join(map(str, minimum_tf_version))
+ assert(tf_version_ok(minimum_tf_version, blacklisted)), 'You are using tensorflow version {}. The minimum required version is {} (blacklisted: {}).'.format(detected, required, blacklisted)
+
+
+def check_tf_version():
+ """ Check that the Tensorflow version is up to date. If it isn't, print an error message and exit the script.
+ """
+ try:
+ assert_tf_version()
+ except AssertionError as e:
+ print(e, file=sys.stderr)
+ sys.exit(1)
diff --git a/imageai/Detection/keras_retinanet/utils/transform.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/transform.py
similarity index 89%
rename from imageai/Detection/keras_retinanet/utils/transform.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/transform.py
index 12be7bdc..4c6afe62 100644
--- a/imageai/Detection/keras_retinanet/utils/transform.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/transform.py
@@ -1,3 +1,19 @@
+"""
+Copyright 2017-2018 Fizyr (https://fizyr.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
import numpy as np
DEFAULT_PRNG = np.random
@@ -14,13 +30,13 @@ def transform_aabb(transform, aabb):
The result is a new AABB in the same coordinate system as the original AABB.
The new AABB contains all corner points of the original AABB after applying the given transformation.
- # Arguments
- transform: The transormation to apply.
- x1: The minimum X value of the AABB.
+ Args
+ transform: The transformation to apply.
+ x1: The minimum x value of the AABB.
y1: The minimum y value of the AABB.
- x2: The maximum X value of the AABB.
+ x2: The maximum x value of the AABB.
y2: The maximum y value of the AABB.
- # Returns
+ Returns
The new AABB as tuple (x1, y1, x2, y2)
"""
x1, y1, x2, y2 = aabb
@@ -40,7 +56,7 @@ def transform_aabb(transform, aabb):
def _random_vector(min, max, prng=DEFAULT_PRNG):
""" Construct a random vector between min and max.
- # Arguments
+ Args
min: the minimum value for each component
max: the maximum value for each component
"""
@@ -53,9 +69,9 @@ def _random_vector(min, max, prng=DEFAULT_PRNG):
def rotation(angle):
""" Construct a homogeneous 2D rotation matrix.
- # Arguments
+ Args
angle: the angle in radians
- # Returns
+ Returns
the rotation matrix as 3 by 3 numpy array
"""
return np.array([
@@ -67,11 +83,11 @@ def rotation(angle):
def random_rotation(min, max, prng=DEFAULT_PRNG):
""" Construct a random rotation between -max and max.
- # Arguments
- min: a scalar for the minumum absolute angle in radians
+ Args
+ min: a scalar for the minimum absolute angle in radians
max: a scalar for the maximum absolute angle in radians
prng: the pseudo-random number generator to use.
- # Returns
+ Returns
a homogeneous 3 by 3 rotation matrix
"""
return rotation(prng.uniform(min, max))
@@ -93,11 +109,11 @@ def translation(translation):
def random_translation(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D translation between min and max.
- # Arguments
- min: a 2D vector with the minumum translation for each dimension
+ Args
+ min: a 2D vector with the minimum translation for each dimension
max: a 2D vector with the maximum translation for each dimension
prng: the pseudo-random number generator to use.
- # Returns
+ Returns
a homogeneous 3 by 3 translation matrix
"""
return translation(_random_vector(min, max, prng))
@@ -105,9 +121,9 @@ def random_translation(min, max, prng=DEFAULT_PRNG):
def shear(angle):
""" Construct a homogeneous 2D shear matrix.
- # Arguments
+ Args
angle: the shear angle in radians
- # Returns
+ Returns
the shear matrix as 3 by 3 numpy array
"""
return np.array([
@@ -119,11 +135,11 @@ def shear(angle):
def random_shear(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D shear matrix with shear angle between -max and max.
- # Arguments
- min: the minumum shear angle in radians.
+ Args
+ min: the minimum shear angle in radians.
max: the maximum shear angle in radians.
prng: the pseudo-random number generator to use.
- # Returns
+ Returns
a homogeneous 3 by 3 shear matrix
"""
return shear(prng.uniform(min, max))
@@ -131,9 +147,9 @@ def random_shear(min, max, prng=DEFAULT_PRNG):
def scaling(factor):
""" Construct a homogeneous 2D scaling matrix.
- # Arguments
+ Args
factor: a 2D vector for X and Y scaling
- # Returns
+ Returns
the zoom matrix as 3 by 3 numpy array
"""
return np.array([
@@ -145,11 +161,11 @@ def scaling(factor):
def random_scaling(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D scale matrix between -max and max.
- # Arguments
+ Args
min: a 2D vector containing the minimum scaling factor for X and Y.
min: a 2D vector containing The maximum scaling factor for X and Y.
prng: the pseudo-random number generator to use.
- # Returns
+ Returns
a homogeneous 3 by 3 scaling matrix
"""
return scaling(_random_vector(min, max, prng))
@@ -157,11 +173,11 @@ def random_scaling(min, max, prng=DEFAULT_PRNG):
def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG):
""" Construct a transformation randomly containing X/Y flips (or not).
- # Arguments
+ Args
flip_x_chance: The chance that the result will contain a flip along the X axis.
flip_y_chance: The chance that the result will contain a flip along the Y axis.
prng: The pseudo-random number generator to use.
- # Returns
+ Returns
a homogeneous 3 by 3 transformation matrix
"""
flip_x = prng.uniform(0, 1) < flip_x_chance
@@ -173,10 +189,10 @@ def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG):
def change_transform_origin(transform, center):
""" Create a new transform representing the same transformation,
only with the origin of the linear part changed.
- # Arguments:
+ Args
transform: the transformation matrix
center: the new origin of the transformation
- # Return:
+ Returns
translate(center) * transform * translate(-center)
"""
center = np.array(center)
@@ -211,7 +227,7 @@ def random_transform(
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
the translation directly as pixel distances instead.
- # Arguments
+ Args
min_rotation: The minimum rotation in radians for the transform as scalar.
max_rotation: The maximum rotation in radians for the transform as scalar.
min_translation: The minimum translation for the transform as 2D column vector.
@@ -251,7 +267,7 @@ def random_transform_generator(prng=None, **kwargs):
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
the translation directly as pixel distances instead.
- # Arguments
+ Args
min_rotation: The minimum rotation in radians for the transform as scalar.
max_rotation: The maximum rotation in radians for the transform as scalar.
min_translation: The minimum translation for the transform as 2D column vector.
diff --git a/imageai/Detection/keras_retinanet/utils/visualization.py b/imageai_tf_deprecated/Detection/keras_retinanet/utils/visualization.py
similarity index 50%
rename from imageai/Detection/keras_retinanet/utils/visualization.py
rename to imageai_tf_deprecated/Detection/keras_retinanet/utils/visualization.py
index 1cdcdfa1..8aba4c93 100644
--- a/imageai/Detection/keras_retinanet/utils/visualization.py
+++ b/imageai_tf_deprecated/Detection/keras_retinanet/utils/visualization.py
@@ -42,8 +42,8 @@ def draw_caption(image, box, caption):
caption : String containing the text to draw.
"""
b = np.array(box).astype(int)
- cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 3)
- cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)
+ cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
+ cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
def draw_boxes(image, boxes, color, thickness=2):
@@ -59,38 +59,48 @@ def draw_boxes(image, boxes, color, thickness=2):
draw_box(image, b, color, thickness=thickness)
-def draw_detections(image, detections, color=None, generator=None):
+def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5):
""" Draws detections in an image.
# Arguments
- image : The image to draw on.
- detections : A [N, 4 + num_classes] matrix (x1, y1, x2, y2, cls_1, cls_2, ...).
- color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
- generator : (optional) Generator which can map label to class name.
+ image : The image to draw on.
+ boxes : A [N, 4] matrix (x1, y1, x2, y2).
+ scores : A list of N classification scores.
+ labels : A list of N labels.
+ color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
+ label_to_name : (optional) Functor for mapping a label to a name.
+ score_threshold : Threshold used for determining what detections to draw.
"""
- for d in detections:
- label = np.argmax(d[4:])
- c = color if color is not None else label_color(label)
- score = d[4 + label]
- caption = (generator.label_to_name(label) if generator else str(label)) + ': {0:.2f}'.format(score)
- draw_caption(image, d, caption)
+ selection = np.where(scores > score_threshold)[0]
+
+ for i in selection:
+ c = color if color is not None else label_color(labels[i])
+ draw_box(image, boxes[i, :], color=c)
- draw_box(image, d, color=c)
+ # draw labels
+ caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i])
+ draw_caption(image, boxes[i, :], caption)
-def draw_annotations(image, annotations, color=(0, 255, 0), generator=None):
+def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None):
""" Draws annotations in an image.
# Arguments
- image : The image to draw on.
- annotations : A [N, 5] matrix (x1, y1, x2, y2, label).
- color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
- generator : (optional) Generator which can map label to class name.
+ image : The image to draw on.
+ annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]).
+ color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
+ label_to_name : (optional) Functor for mapping a label to a name.
"""
- for a in annotations:
- label = a[4]
- c = color if color is not None else label_color(label)
- caption = '{}'.format(generator.label_to_name(label) if generator else label)
- draw_caption(image, a, caption)
+ if isinstance(annotations, np.ndarray):
+ annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]}
- draw_box(image, a, color=c)
+ assert('bboxes' in annotations)
+ assert('labels' in annotations)
+ assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0])
+
+ for i in range(annotations['bboxes'].shape[0]):
+ label = annotations['labels'][i]
+ c = color if color is not None else label_color(label)
+ caption = '{}'.format(label_to_name(label) if label_to_name else label)
+ draw_caption(image, annotations['bboxes'][i], caption)
+ draw_box(image, annotations['bboxes'][i], color=c)
diff --git a/imageai_tf_deprecated/Prediction/Custom/__init__.py b/imageai_tf_deprecated/Prediction/Custom/__init__.py
new file mode 100644
index 00000000..49f6ef76
--- /dev/null
+++ b/imageai_tf_deprecated/Prediction/Custom/__init__.py
@@ -0,0 +1,22 @@
+from ...Classification.Custom import ClassificationModelTrainer, CustomImageClassification
+
+
+
+
+
+class ModelTraining(ClassificationModelTrainer):
+ """
+ Deprecated!
+ Replaced with 'imageai.Classification.Custom.ClassificationModelTrainer'
+ """
+ def __call__(self):
+ None
+
+class CustomImagePrediction(CustomImageClassification):
+ """
+ Deprecated!
+ Replaced with 'imageai.Classification.Custom.CustomImageClassification'
+ """
+
+ def __call__(self):
+ None
\ No newline at end of file
diff --git a/imageai/Prediction/Custom/custom_utils.py b/imageai_tf_deprecated/Prediction/Custom/custom_utils.py
similarity index 100%
rename from imageai/Prediction/Custom/custom_utils.py
rename to imageai_tf_deprecated/Prediction/Custom/custom_utils.py
diff --git a/imageai_tf_deprecated/Prediction/__init__.py b/imageai_tf_deprecated/Prediction/__init__.py
new file mode 100644
index 00000000..0a8ca882
--- /dev/null
+++ b/imageai_tf_deprecated/Prediction/__init__.py
@@ -0,0 +1,12 @@
+from ..Classification import ImageClassification
+from matplotlib.cbook import deprecated
+
+
+class ImagePrediction(ImageClassification):
+ """
+ Deprecated!
+ Replaced with 'imageai.Classification.ImageClassification'
+ """
+
+ def __call__(self):
+ None
\ No newline at end of file
diff --git a/imageai/Prediction/imagenet_utils.py b/imageai_tf_deprecated/Prediction/imagenet_utils.py
similarity index 100%
rename from imageai/Prediction/imagenet_utils.py
rename to imageai_tf_deprecated/Prediction/imagenet_utils.py
diff --git a/imageai_tf_deprecated/__init__.py b/imageai_tf_deprecated/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/images/Thumbs.db b/images/Thumbs.db
deleted file mode 100644
index ea712392..00000000
Binary files a/images/Thumbs.db and /dev/null differ
diff --git a/images/image3new.jpg-objects/Thumbs.db b/images/image3new.jpg-objects/Thumbs.db
deleted file mode 100644
index 5a6736f5..00000000
Binary files a/images/image3new.jpg-objects/Thumbs.db and /dev/null differ
diff --git a/jarvis.png b/jarvis.png
new file mode 100644
index 00000000..2d0227f8
Binary files /dev/null and b/jarvis.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..f44ebeb5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+cython
+pillow>=7.0.0
+numpy>=1.18.1
+opencv-python>=4.1.2
+torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cpu
+torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cpu
+pytest==7.1.3
+tqdm==4.64.1
+scipy>=1.7.3
+matplotlib>=3.4.3
+mock==4.0.3
\ No newline at end of file
diff --git a/requirements_extra.txt b/requirements_extra.txt
new file mode 100644
index 00000000..6141e585
--- /dev/null
+++ b/requirements_extra.txt
@@ -0,0 +1 @@
+pycocotools@git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI
\ No newline at end of file
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
new file mode 100644
index 00000000..9bdbf06b
--- /dev/null
+++ b/requirements_gpu.txt
@@ -0,0 +1,11 @@
+cython
+pillow>=7.0.0
+numpy>=1.18.1
+opencv-python>=4.1.2
+torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cu102
+torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cu102
+pytest==7.1.3
+tqdm==4.64.1
+scipy>=1.7.3
+matplotlib>=3.4.3
+mock==4.0.3
\ No newline at end of file
diff --git a/scripts/pascal_voc_to_yolo.py b/scripts/pascal_voc_to_yolo.py
new file mode 100644
index 00000000..5d83f74f
--- /dev/null
+++ b/scripts/pascal_voc_to_yolo.py
@@ -0,0 +1,157 @@
+import glob
+import os
+import argparse
+import pickle
+import xml.etree.ElementTree as ET
+from os import listdir, getcwd
+from os.path import join
+import shutil
+
+
+dirs = ['train', 'validation']
+sub_dirs = ["images", "annotations"]
+classes = []
+
+def convert(size, box):
+ dw = 1./(size[0])
+ dh = 1./(size[1])
+ x = (box[0] + box[1])/2.0 - 1
+ y = (box[2] + box[3])/2.0 - 1
+ w = box[1] - box[0]
+ h = box[3] - box[2]
+ x = x*dw
+ w = w*dw
+ y = y*dh
+ h = h*dh
+ return (x,y,w,h)
+
+def convert_annotation(input_ann_path):
+
+ tree = ET.parse(input_ann_path)
+ root = tree.getroot()
+ size = root.find('size')
+ w = int(size.find('width').text)
+ h = int(size.find('height').text)
+
+ ann_list = []
+
+ for obj in root.iter('object'):
+ obj_class = obj.find('name').text
+ if obj_class not in classes:
+ classes.append(obj_class)
+ xmlbox = obj.find('bndbox')
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+ bb = convert((w,h), b)
+
+ ann_list.append(
+ {
+ "class": obj_class,
+ "bbox": bb
+ }
+ )
+
+ return ann_list
+
+
+def main(dataset_dir: str):
+ yolo_dataset = os.path.join(
+ os.path.dirname(dataset_dir),
+ os.path.basename(f"{dataset_dir}-yolo")
+ )
+ for dir in dirs:
+ dir_path = os.path.join(
+ yolo_dataset,
+ dir
+ )
+ os.makedirs(dir_path, exist_ok=True)
+
+ for sub_dir in sub_dirs:
+ os.makedirs(
+ os.path.join(
+ dir_path,
+ sub_dir
+ ),
+ exist_ok=True
+ )
+
+ train_anns = {}
+ validation_anns = {}
+
+ for dir in dirs:
+ dir_path = os.path.join(
+ dataset_dir,
+ dir
+ )
+
+ images = [file for file in os.listdir(
+ os.path.join(dir_path, "images")
+ ) if file.endswith(".png") or file.endswith(".jpg") or file.endswith(".jpeg")]
+
+ annotations = [file for file in os.listdir(
+ os.path.join(dir_path, "annotations")
+ ) if file.endswith(".xml")]
+
+ for image, annotation in zip(images, annotations):
+ shutil.copy(
+ os.path.join(
+ dataset_dir,
+ dir,
+ "images",
+ image
+ ),
+ os.path.join(
+ yolo_dataset,
+ dir,
+ "images",
+ image
+ )
+ )
+
+ ann_list = convert_annotation(
+ os.path.join(
+ dataset_dir,
+ dir,
+ "annotations",
+ annotation
+ )
+ )
+ if dir == "train":
+ train_anns[annotation] = ann_list
+ elif dir == "validation":
+ validation_anns[annotation] = ann_list
+
+ all_classes = sorted(classes)
+
+ for k,v in {"train": train_anns, "validation": validation_anns}.items():
+ for anns_k, anns_v in v.items():
+ output_ann_path = os.path.join(
+ yolo_dataset, k, "annotations", anns_k.replace(".xml", ".txt")
+ )
+ anns_str = ""
+ for ann in anns_v:
+ class_idx = all_classes.index(ann["class"])
+ bbox = [str(f) for f in ann["bbox"]]
+ anns_str += f"{class_idx} {' '.join(bbox)}\n"
+
+ with open(output_ann_path, "w") as ann_writer:
+ ann_writer.write(anns_str)
+
+ with open(os.path.join(
+ yolo_dataset, k, "annotations", "classes.txt"
+ ), "w") as classes_writer:
+ classes_writer.write("\n".join(all_classes))
+
+
+
+if __name__ == "__main__":
+
+ parse = argparse.ArgumentParser(
+ description="Convert Pascal VOC dataset to YOLO format")
+ parse.add_argument(
+ "--dataset_dir",
+ help="Dataset directory",
+ type=str,
+ required=True,
+ )
+ args = parse.parse_args()
+ main(args.dataset_dir)
diff --git a/setup.py b/setup.py
index 9c5a52d5..e2f226d6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,13 @@
from setuptools import setup,find_packages
setup(name="imageai",
- version='2.0.1',
- description='A flexible Computer Vision and Deep Learning library for applications and systems.',
- url="https://moses.specpal.science",
- author='Moses Olafenwa and John Olafenwa',
+ version='3.0.3',
+ description='A python library built to empower developers to build applications and systems with self-contained Computer Vision capabilities',
+ url="https://github.com/OlafenwaMoses/ImageAI",
+ author='Moses Olafenwa',
+ author_email='guymodscientist@gmail.com',
license='MIT',
- packages= find_packages(),
- zip_safe=False
-
- )
\ No newline at end of file
+ packages= find_packages(exclude=["*imageai_tf_deprecated*"]),
+ install_requires=[],
+ include_package_data=True,
+ zip_safe=False)
\ No newline at end of file
diff --git a/test-images/9.jpg b/test-images/9.jpg
index 16b26c30..7bacab72 100644
Binary files a/test-images/9.jpg and b/test-images/9.jpg differ
diff --git a/test/test_custom_classification.py b/test/test_custom_classification.py
new file mode 100644
index 00000000..974c99ed
--- /dev/null
+++ b/test/test_custom_classification.py
@@ -0,0 +1,100 @@
+import os, sys
+import cv2
+from PIL import Image
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Classification.Custom import CustomImageClassification
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_mobilenetv2(image_input):
+
+ classifier = CustomImageClassification()
+ classifier.setModelTypeAsMobileNetV2()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt"))
+ classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_resnet(image_input):
+
+ classifier = CustomImageClassification()
+ classifier.setModelTypeAsResNet50()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
+ classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_inceptionv3(image_input):
+
+ classifier = CustomImageClassification()
+ classifier.setModelTypeAsInceptionV3()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "inception_v3-idenprof-test_acc_0.81050_epoch-92.pt"))
+ classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_densenet(image_input):
+
+ classifier = CustomImageClassification()
+ classifier.setModelTypeAsDenseNet121()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "densenet121-idenprof-test_acc_0.82550_epoch-95.pt"))
+ classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
\ No newline at end of file
diff --git a/test/test_custom_classification_training.py b/test/test_custom_classification_training.py
new file mode 100644
index 00000000..1b25ecd7
--- /dev/null
+++ b/test/test_custom_classification_training.py
@@ -0,0 +1,192 @@
+import os, sys
+import cv2
+import shutil
+from PIL import Image
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Classification.Custom import ClassificationModelTrainer, CustomImageClassification
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+classification_dataset = os.path.join(
+ test_folder,
+ "data-datasets",
+ "idenprof"
+)
+
+pretrained_models_folder = os.path.join(
+ test_folder,
+ "data-models"
+)
+
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "resnet50-19c8e357.pth"
+ )),
+ (None),
+ ]
+)
+def test_resnet50_training(transfer_learning):
+
+ models_dir = os.path.join(
+ classification_dataset,
+ "models"
+ )
+ if os.path.isdir(
+ models_dir
+ ):
+ shutil.rmtree(models_dir)
+
+ trainer = ClassificationModelTrainer()
+ trainer.setModelTypeAsResNet50()
+ trainer.setDataDirectory(data_directory=classification_dataset)
+ trainer.trainModel(
+ num_experiments=1,
+ batch_size=2,
+ transfer_from_model=transfer_learning)
+
+ assert os.path.isdir(models_dir) == True
+ assert os.path.isfile(
+ os.path.join(
+ models_dir, "idenprof_model_classes.json"
+ )
+ ) == True
+
+ model_found = False
+ for file in os.listdir(models_dir):
+ if file.endswith(".pt"):
+ model_found = True
+ assert model_found == True
+
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "densenet121-a639ec97.pth"
+ )),
+ (None),
+ ]
+)
+def test_densenet121_training(transfer_learning):
+
+ models_dir = os.path.join(
+ classification_dataset,
+ "models"
+ )
+ if os.path.isdir(
+ models_dir
+ ):
+ shutil.rmtree(models_dir)
+
+ trainer = ClassificationModelTrainer()
+ trainer.setModelTypeAsDenseNet121()
+ trainer.setDataDirectory(data_directory=classification_dataset)
+ trainer.trainModel(
+ num_experiments=1,
+ batch_size=2,
+ transfer_from_model=transfer_learning)
+
+ assert os.path.isdir(models_dir) == True
+ assert os.path.isfile(
+ os.path.join(
+ models_dir, "idenprof_model_classes.json"
+ )
+ ) == True
+ model_found = False
+ for file in os.listdir(models_dir):
+ if file.endswith(".pt"):
+ model_found = True
+ assert model_found == True
+
+
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "inception_v3_google-1a9a5a14.pth"
+ )),
+ (None),
+ ]
+)
+def test_inceptionv3_training(transfer_learning):
+
+ models_dir = os.path.join(
+ classification_dataset,
+ "models"
+ )
+ if os.path.isdir(
+ models_dir
+ ):
+ shutil.rmtree(models_dir)
+
+ trainer = ClassificationModelTrainer()
+ trainer.setModelTypeAsInceptionV3()
+ trainer.setDataDirectory(data_directory=classification_dataset)
+ trainer.trainModel(
+ num_experiments=1,
+ batch_size=2,
+ transfer_from_model=transfer_learning)
+
+ assert os.path.isdir(models_dir) == True
+ assert os.path.isfile(
+ os.path.join(
+ models_dir, "idenprof_model_classes.json"
+ )
+ ) == True
+ model_found = False
+ for file in os.listdir(models_dir):
+ if file.endswith(".pt"):
+ model_found = True
+ assert model_found == True
+
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "mobilenet_v2-b0353104.pth"
+ )),
+ (None),
+ ]
+)
+def test_mobilenetv2_training(transfer_learning):
+
+ models_dir = os.path.join(
+ classification_dataset,
+ "models"
+ )
+ if os.path.isdir(
+ models_dir
+ ):
+ shutil.rmtree(models_dir)
+
+ trainer = ClassificationModelTrainer()
+ trainer.setModelTypeAsMobileNetV2()
+ trainer.setDataDirectory(data_directory=classification_dataset)
+ trainer.trainModel(
+ num_experiments=1,
+ batch_size=2,
+ transfer_from_model=transfer_learning)
+
+ assert os.path.isdir(models_dir) == True
+ assert os.path.isfile(
+ os.path.join(
+ models_dir, "idenprof_model_classes.json"
+ )
+ ) == True
+ model_found = False
+ for file in os.listdir(models_dir):
+ if file.endswith(".pt"):
+ model_found = True
+ assert model_found == True
diff --git a/test/test_custom_detection_training.py b/test/test_custom_detection_training.py
new file mode 100644
index 00000000..bbd43972
--- /dev/null
+++ b/test/test_custom_detection_training.py
@@ -0,0 +1,83 @@
+import os, sys
+import shutil
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Detection.Custom import DetectionModelTrainer
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+detection_dataset = os.path.join(
+ test_folder,
+ "data-datasets",
+ "number-plate"
+)
+
+pretrained_models_folder = os.path.join(
+ test_folder,
+ "data-models"
+)
+
+def delete_cache(dirs: list):
+ for dir in dirs:
+ if os.path.isdir(dir):
+ shutil.rmtree(dir)
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "yolov3.pt"
+ )),
+ (None),
+ ]
+)
+def test_yolov3_training(transfer_learning):
+ json_dir = os.path.join(detection_dataset, "json")
+ json_file = os.path.join(json_dir, "number-plate_yolov3_detection_config.json")
+ models_dir = os.path.join(detection_dataset, "models")
+
+ delete_cache([json_dir, models_dir])
+
+ trainer = DetectionModelTrainer()
+ trainer.setModelTypeAsYOLOv3()
+ trainer.setDataDirectory(data_directory=detection_dataset)
+ trainer.setTrainConfig(object_names_array=["number-plate"], batch_size=2, num_experiments=2, train_from_pretrained_model=transfer_learning)
+ trainer.trainModel()
+
+
+ assert os.path.isfile(json_file)
+ assert len([file for file in os.listdir(models_dir) if file.endswith(".pt")]) > 0
+
+ delete_cache([json_dir, models_dir])
+
+@pytest.mark.parametrize(
+ "transfer_learning",
+ [
+ (os.path.join(
+ pretrained_models_folder,
+ "tiny-yolov3.pt"
+ )),
+ (None),
+ ]
+)
+def test_tiny_yolov3_training(transfer_learning):
+ json_dir = os.path.join(detection_dataset, "json")
+ json_file = os.path.join(json_dir, "number-plate_tiny-yolov3_detection_config.json")
+ models_dir = os.path.join(detection_dataset, "models")
+
+ delete_cache([json_dir, models_dir])
+
+ trainer = DetectionModelTrainer()
+ trainer.setModelTypeAsTinyYOLOv3()
+ trainer.setDataDirectory(data_directory=detection_dataset)
+ trainer.setTrainConfig(object_names_array=["number-plate"], batch_size=2, num_experiments=2, train_from_pretrained_model=transfer_learning)
+ trainer.trainModel()
+
+
+ assert os.path.isfile(json_file)
+ assert len([file for file in os.listdir(models_dir) if file.endswith(".pt")]) > 0
+
+ delete_cache([json_dir, models_dir])
\ No newline at end of file
diff --git a/test/test_custom_object_detection.py b/test/test_custom_object_detection.py
new file mode 100644
index 00000000..598b0fe0
--- /dev/null
+++ b/test/test_custom_object_detection.py
@@ -0,0 +1,159 @@
+import os, sys
+from typing import List
+import shutil
+import cv2
+import uuid
+from PIL import Image
+import numpy as np
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Detection.Custom import CustomObjectDetection
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+
+def delete_cache(paths: List[str]):
+ for path in paths:
+ if os.path.isfile(path):
+ os.remove(path)
+ elif os.path.isdir(path):
+ shutil.rmtree(path)
+
+
+@pytest.mark.parametrize(
+ "input_image, output_type, extract_objects",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", True),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", False),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
+ ]
+)
+def test_object_detection_yolov3(input_image, output_type, extract_objects):
+ detector = CustomObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
+ detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
+ detector.loadModel()
+
+ output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")
+
+ if output_type == "array":
+ if extract_objects:
+ output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)
+
+ assert len(detections) > 0
+ assert len(extracted_objects) > 0
+ for extracted_obj in extracted_objects:
+ assert type(extracted_obj) == np.ndarray
+ else:
+ output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
+ assert type(output_image_array) == np.ndarray
+ assert len(detections) > 0
+ else:
+ if extract_objects:
+ detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)
+
+ assert len(detections) > 0
+ assert os.path.isfile(output_img_path)
+ assert len(extracted_object_paths) > 0
+ delete_cache(
+ extracted_object_paths
+ )
+ delete_cache(
+ [extracted_object_paths[0], output_img_path]
+ )
+ else:
+ detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
+ assert len(detections) > 0
+ delete_cache([output_img_path])
+
+ assert type(detections) == list
+
+
+ for eachObject in detections:
+ assert type(eachObject) == dict
+ assert "name" in eachObject.keys()
+ assert type(eachObject["name"]) == str
+ assert "percentage_probability" in eachObject.keys()
+ assert type(eachObject["percentage_probability"]) == float
+ assert "box_points" in eachObject.keys()
+ assert type(eachObject["box_points"]) == list
+ box_points = eachObject["box_points"]
+ for point in box_points:
+ assert type(point) == int
+ assert box_points[0] < box_points[2]
+ assert box_points[1] < box_points[3]
+
+
+
+@pytest.mark.parametrize(
+ "input_image, output_type, extract_objects",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", True),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", False),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
+ ]
+)
+def test_object_detection_tiny_yolov3(input_image, output_type, extract_objects):
+ detector = CustomObjectDetection()
+ detector.setModelTypeAsTinyYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt"))
+ detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_tiny_yolov3_detection_config.json"))
+ detector.loadModel()
+
+ output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")
+
+ if output_type == "array":
+ if extract_objects:
+ output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)
+
+ assert len(detections) > 0
+ assert len(extracted_objects) == len(detections)
+ for extracted_obj in extracted_objects:
+ assert type(extracted_obj) == np.ndarray
+ else:
+ output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
+ assert type(output_image_array) == np.ndarray
+ assert len(detections) > 0
+ else:
+ if extract_objects:
+ detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)
+
+ assert len(detections) > 0
+ assert os.path.isfile(output_img_path)
+ assert len(extracted_object_paths) == len(detections)
+ delete_cache(
+ extracted_object_paths
+ )
+ delete_cache(
+ [extracted_object_paths[0], output_img_path]
+ )
+ else:
+ detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
+ assert len(detections) > 0
+ delete_cache([output_img_path])
+
+ assert type(detections) == list
+
+
+ for eachObject in detections:
+ assert type(eachObject) == dict
+ assert "name" in eachObject.keys()
+ assert type(eachObject["name"]) == str
+ assert "percentage_probability" in eachObject.keys()
+ assert type(eachObject["percentage_probability"]) == float
+ assert "box_points" in eachObject.keys()
+ assert type(eachObject["box_points"]) == list
+ box_points = eachObject["box_points"]
+ for point in box_points:
+ assert type(point) == int
+ assert box_points[0] < box_points[2]
+ assert box_points[1] < box_points[3]
+
+
diff --git a/test/test_custom_video_detection.py b/test/test_custom_video_detection.py
new file mode 100644
index 00000000..844657a8
--- /dev/null
+++ b/test/test_custom_video_detection.py
@@ -0,0 +1,119 @@
+import os, sys
+from typing import List
+from numpy import ndarray
+from os.path import dirname
+from mock import patch
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+
+from imageai.Detection.Custom import CustomVideoObjectDetection
+
+
+test_folder = dirname(os.path.abspath(__file__))
+
+video_file = os.path.join(test_folder, "data-videos", "dashcam.mp4")
+video_file_output = os.path.join(test_folder, "data-videos", "dashcam-detected")
+
+
+
+class CallbackFunctions:
+ def forFrame(frame_number, output_array, output_count, detected_frame):
+ assert isinstance(detected_frame, ndarray)
+ assert isinstance(frame_number, int)
+ assert isinstance(output_array, list)
+ assert isinstance(output_array[0], dict)
+ assert isinstance(output_array[0]["name"], str)
+ assert isinstance(output_array[0]["percentage_probability"], float)
+ assert isinstance(output_array[0]["box_points"], list)
+
+ assert isinstance(output_count, dict)
+ for a_key in dict(output_count).keys():
+ assert isinstance(a_key, str)
+ assert isinstance(output_count[a_key], int)
+
+ def forSecond(second_number, output_arrays, count_arrays, average_output_count, detected_frame):
+ assert isinstance(detected_frame, ndarray)
+ assert isinstance(second_number, int)
+ assert isinstance(output_arrays, list)
+ assert isinstance(output_arrays[0], list)
+
+ assert isinstance(output_arrays[0][0], dict)
+ assert isinstance(output_arrays[0][0]["name"], str)
+ assert isinstance(output_arrays[0][0]["percentage_probability"], float)
+ assert isinstance(output_arrays[0][0]["box_points"], list)
+
+ assert isinstance(count_arrays, list)
+ assert isinstance(count_arrays[0], dict)
+ for a_key in dict(count_arrays[0]).keys():
+ assert isinstance(a_key, str)
+ assert isinstance(count_arrays[0][a_key], int)
+
+ assert isinstance(average_output_count, dict)
+ for a_key2 in dict(average_output_count).keys():
+ assert isinstance(a_key2, str)
+ assert isinstance(average_output_count[a_key2], int)
+
+
+
+def delete_cache(files: List[str]):
+ for file in files:
+ if os.path.isfile(file):
+ os.remove(file)
+
+
+
+
+def test_video_detection_yolov3():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = CustomVideoObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
+ detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
+ detector.loadModel()
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+def test_video_detection_tiny_yolov3():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = CustomVideoObjectDetection()
+ detector.setModelTypeAsTinyYOLOv3()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt"))
+ detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_tiny_yolov3_detection_config.json"))
+ detector.loadModel()
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+def test_video_detection_yolo_analysis():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = CustomVideoObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
+ detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
+ detector.loadModel()
+
+ with patch.object(CallbackFunctions, 'forFrame') as frameFunc:
+ with patch.object(CallbackFunctions, 'forSecond') as secondFunc:
+
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, per_frame_function=frameFunc, per_second_function=secondFunc, return_detected_frame=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ frameFunc.assert_called()
+ secondFunc.assert_called()
+
+ delete_cache([video_file_output + ".mp4"])
+
+
diff --git a/test/test_image_classification.py b/test/test_image_classification.py
new file mode 100644
index 00000000..61563390
--- /dev/null
+++ b/test/test_image_classification.py
@@ -0,0 +1,96 @@
+import os, sys
+import cv2
+from PIL import Image
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Classification import ImageClassification
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_mobilenetv2(image_input):
+
+ classifier = ImageClassification()
+ classifier.setModelTypeAsMobileNetV2()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "mobilenet_v2-b0353104.pth"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_resnet(image_input):
+
+ classifier = ImageClassification()
+ classifier.setModelTypeAsResNet50()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "resnet50-19c8e357.pth"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_inceptionv3(image_input):
+
+ classifier = ImageClassification()
+ classifier.setModelTypeAsInceptionV3()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "inception_v3_google-1a9a5a14.pth"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
+
+@pytest.mark.parametrize(
+ "image_input",
+ [
+ (os.path.join(test_folder, "data-images", "1.jpg")),
+ (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
+ (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
+ ]
+)
+def test_recognition_model_densenet(image_input):
+
+ classifier = ImageClassification()
+ classifier.setModelTypeAsDenseNet121()
+ classifier.setModelPath(os.path.join(test_folder, "data-models", "densenet121-a639ec97.pth"))
+ classifier.loadModel()
+ predictions, probabilities = classifier.classifyImage(image_input=image_input)
+
+ assert isinstance(predictions, list)
+ assert isinstance(probabilities, list)
+ assert isinstance(predictions[0], str)
+ assert isinstance(probabilities[0], float)
\ No newline at end of file
diff --git a/test/test_object_detection.py b/test/test_object_detection.py
new file mode 100644
index 00000000..a0aebeb5
--- /dev/null
+++ b/test/test_object_detection.py
@@ -0,0 +1,305 @@
+import os, sys
+from typing import List
+import shutil
+import cv2
+import uuid
+from PIL import Image
+import numpy as np
+import pytest
+from os.path import dirname
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+from imageai.Detection import ObjectDetection
+
+test_folder = dirname(os.path.abspath(__file__))
+
+
+def delete_cache(paths: List[str]):
+ for path in paths:
+ if os.path.isfile(path):
+ os.remove(path)
+ elif os.path.isdir(path):
+ shutil.rmtree(path)
+
+
+@pytest.mark.parametrize(
+ "input_image, output_type, extract_objects",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
+ ]
+)
+def test_object_detection_retinanet(input_image, output_type, extract_objects):
+ detector = ObjectDetection()
+ detector.setModelTypeAsRetinaNet()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
+ detector.loadModel()
+
+ output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")
+
+ if output_type == "array":
+ if extract_objects:
+ output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)
+
+ assert len(extracted_objects) > 1
+ for extracted_obj in extracted_objects:
+ assert type(extracted_obj) == np.ndarray
+ assert type(detections) == list
+ else:
+ output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
+ assert type(output_image_array) == np.ndarray
+ assert type(detections) == list
+ else:
+ if extract_objects:
+ detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)
+
+ assert type(detections) == list
+ assert os.path.isfile(output_img_path)
+ assert len(extracted_object_paths) > 3
+ delete_cache(
+ extracted_object_paths
+ )
+ delete_cache(
+ [extracted_object_paths[0], output_img_path]
+ )
+ else:
+ detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
+ assert type(detections) == list
+ delete_cache(
+ [output_img_path]
+ )
+
+
+ for eachObject in detections:
+ assert type(eachObject) == dict
+ assert "name" in eachObject.keys()
+ assert type(eachObject["name"]) == str
+ assert "percentage_probability" in eachObject.keys()
+ assert type(eachObject["percentage_probability"]) == float
+ assert "box_points" in eachObject.keys()
+ assert type(eachObject["box_points"]) == list
+ box_points = eachObject["box_points"]
+ for point in box_points:
+ assert type(point) == int
+ assert box_points[0] < box_points[2]
+ assert box_points[1] < box_points[3]
+
+
+@pytest.mark.parametrize(
+ "input_image, output_type, extract_objects",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
+ ]
+)
+def test_object_detection_yolov3(input_image, output_type, extract_objects):
+ detector = ObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3.pt"))
+ detector.loadModel()
+
+ output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")
+
+ if output_type == "array":
+ if extract_objects:
+ output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)
+
+ assert len(extracted_objects) > 1
+ assert type(detections) == list
+ for extracted_obj in extracted_objects:
+ assert type(extracted_obj) == np.ndarray
+ else:
+ output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
+ assert type(output_image_array) == np.ndarray
+ assert type(detections) == list
+ else:
+ if extract_objects:
+ detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)
+
+ assert os.path.isfile(output_img_path)
+ assert len(extracted_object_paths) > 3
+ assert type(detections) == list
+ delete_cache(
+ extracted_object_paths
+ )
+ delete_cache(
+ [extracted_object_paths[0], output_img_path]
+ )
+ else:
+ detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
+ assert type(detections) == list
+ delete_cache(
+ [output_img_path]
+ )
+
+
+
+ for eachObject in detections:
+ assert type(eachObject) == dict
+ assert "name" in eachObject.keys()
+ assert type(eachObject["name"]) == str
+ assert "percentage_probability" in eachObject.keys()
+ assert type(eachObject["percentage_probability"]) == float
+ assert "box_points" in eachObject.keys()
+ assert type(eachObject["box_points"]) == list
+ box_points = eachObject["box_points"]
+ for point in box_points:
+ assert type(point) == int
+ assert box_points[0] < box_points[2]
+ assert box_points[1] < box_points[3]
+
+
+@pytest.mark.parametrize(
+ "input_image, output_type, extract_objects",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
+ (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg")), "array", True),
+ ]
+)
+def test_object_detection_tiny_yolov3(input_image, output_type, extract_objects):
+ detector = ObjectDetection()
+ detector.setModelTypeAsTinyYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
+ detector.loadModel()
+
+
+ output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")
+
+ if output_type == "array":
+ if extract_objects:
+ output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)
+
+ assert len(extracted_objects) > 1
+ assert type(detections) == list
+ for extracted_obj in extracted_objects:
+ assert type(extracted_obj) == np.ndarray
+ else:
+ output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
+ assert type(output_image_array) == np.ndarray
+ assert type(detections) == list
+ else:
+ if extract_objects:
+ detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)
+
+ assert os.path.isfile(output_img_path)
+ assert len(extracted_object_paths) > 1
+ assert type(detections) == list
+ delete_cache(
+ extracted_object_paths
+ )
+ delete_cache(
+ [extracted_object_paths[0], output_img_path]
+ )
+
+ else:
+ detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
+ assert type(detections) == list
+ delete_cache(
+ [output_img_path]
+ )
+
+
+
+ for eachObject in detections:
+ assert type(eachObject) == dict
+ assert "name" in eachObject.keys()
+ assert type(eachObject["name"]) == str
+ assert "percentage_probability" in eachObject.keys()
+ assert type(eachObject["percentage_probability"]) == float
+ assert "box_points" in eachObject.keys()
+ assert type(eachObject["box_points"]) == list
+ box_points = eachObject["box_points"]
+ for point in box_points:
+ assert type(point) == int
+ assert box_points[0] < box_points[2]
+ assert box_points[1] < box_points[3]
+
+
+@pytest.mark.parametrize(
+ "input_image",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ ]
+)
+def test_object_detection_retinanet_custom_objects(input_image):
+ detector = ObjectDetection()
+ detector.setModelTypeAsRetinaNet()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
+ detector.loadModel()
+
+ custom = detector.CustomObjects(person=True, cell_phone=True)
+
+ custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
+
+ for custom_detection in custom_detections:
+ assert custom_detection["name"] in ["person", "cell phone"]
+
+ detections = detector.detectObjectsFromImage(input_image=input_image)
+
+ assert len(detections) > len(custom_detections)
+
+
+@pytest.mark.parametrize(
+ "input_image",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ ]
+)
+def test_object_detection_yolov3_custom_objects(input_image):
+ detector = ObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3.pt"))
+ detector.loadModel()
+
+ custom = detector.CustomObjects(person=True, cell_phone=True)
+
+ custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
+
+ for custom_detection in custom_detections:
+ assert custom_detection["name"] in ["person", "cell phone"]
+
+ detections = detector.detectObjectsFromImage(input_image=input_image)
+
+ assert len(detections) > len(custom_detections)
+
+
+@pytest.mark.parametrize(
+ "input_image",
+ [
+ (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
+ (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
+ ]
+)
+def test_object_detection_tiny_yolov3_custom_objects(input_image):
+ detector = ObjectDetection()
+ detector.setModelTypeAsTinyYOLOv3()
+ detector.setModelPath(os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
+ detector.loadModel()
+
+ custom = detector.CustomObjects(person=True, cell_phone=True)
+
+ custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
+
+ for custom_detection in custom_detections:
+ assert custom_detection["name"] in ["person", "cell phone"]
+
+ detections = detector.detectObjectsFromImage(input_image=input_image)
+
+ assert len(detections) > len(custom_detections)
+
diff --git a/test/test_video_object_detection.py b/test/test_video_object_detection.py
new file mode 100644
index 00000000..4bfa5ac9
--- /dev/null
+++ b/test/test_video_object_detection.py
@@ -0,0 +1,155 @@
+import os, sys
+from typing import List
+from numpy import ndarray
+from os.path import dirname
+from mock import patch
+sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
+
+from imageai.Detection import VideoObjectDetection
+
+
+test_folder = dirname(os.path.abspath(__file__))
+
+video_file = os.path.join(test_folder, "data-videos", "traffic-micro.mp4")
+video_file_output = os.path.join(test_folder, "data-videos", "traffic-micro-detected")
+
+
+
+class CallbackFunctions:
+ def forFrame(frame_number, output_array, output_count, detected_frame):
+ assert isinstance(detected_frame, ndarray)
+ assert isinstance(frame_number, int)
+ assert isinstance(output_array, list)
+ assert isinstance(output_array[0], dict)
+ assert isinstance(output_array[0]["name"], str)
+ assert isinstance(output_array[0]["percentage_probability"], float)
+ assert isinstance(output_array[0]["box_points"], list)
+
+ assert isinstance(output_count, dict)
+ for a_key in dict(output_count).keys():
+ assert isinstance(a_key, str)
+ assert isinstance(output_count[a_key], int)
+
+ def forSecond(second_number, output_arrays, count_arrays, average_output_count, detected_frame):
+ assert isinstance(detected_frame, ndarray)
+ assert isinstance(second_number, int)
+ assert isinstance(output_arrays, list)
+ assert isinstance(output_arrays[0], list)
+
+ assert isinstance(output_arrays[0][0], dict)
+ assert isinstance(output_arrays[0][0]["name"], str)
+ assert isinstance(output_arrays[0][0]["percentage_probability"], float)
+ assert isinstance(output_arrays[0][0]["box_points"], list)
+
+ assert isinstance(count_arrays, list)
+ assert isinstance(count_arrays[0], dict)
+ for a_key in dict(count_arrays[0]).keys():
+ assert isinstance(a_key, str)
+ assert isinstance(count_arrays[0][a_key], int)
+
+ assert isinstance(average_output_count, dict)
+ for a_key2 in dict(average_output_count).keys():
+ assert isinstance(a_key2, str)
+ assert isinstance(average_output_count[a_key2], int)
+
+
+
+def delete_cache(files: List[str]):
+ for file in files:
+ if os.path.isfile(file):
+ os.remove(file)
+
+
+def test_video_detection_retinanet():
+
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = VideoObjectDetection()
+ detector.setModelTypeAsRetinaNet()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
+ detector.loadModel()
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+def test_video_detection_retinanet_custom_objects():
+
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = VideoObjectDetection()
+ detector.setModelTypeAsRetinaNet()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
+ detector.loadModel()
+
+ custom_objects = detector.CustomObjects(
+ person=True,
+ bus=True
+ )
+
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, custom_objects=custom_objects)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+
+
+def test_video_detection_yolov3():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = VideoObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3.pt"))
+ detector.loadModel()
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+
+def test_video_detection_tiny_yolov3():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = VideoObjectDetection()
+ detector.setModelTypeAsTinyYOLOv3()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
+ detector.loadModel()
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ delete_cache([video_file_output + ".mp4"])
+
+
+def test_video_detection_retinanet_analysis():
+ delete_cache([video_file_output + ".mp4"])
+
+ detector = VideoObjectDetection()
+ detector.setModelTypeAsRetinaNet()
+ detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
+ detector.loadModel()
+
+ with patch.object(CallbackFunctions, 'forFrame') as frameFunc:
+ with patch.object(CallbackFunctions, 'forSecond') as secondFunc:
+
+ video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, per_frame_function=frameFunc, per_second_function=secondFunc, return_detected_frame=True)
+
+ assert os.path.exists(video_file_output + ".mp4")
+ assert isinstance(video_path, str)
+
+ frameFunc.assert_called()
+ secondFunc.assert_called()
+
+ delete_cache([video_file_output + ".mp4"])
+
+
diff --git a/theiaengine.png b/theiaengine.png
new file mode 100644
index 00000000..cbc3a4f2
Binary files /dev/null and b/theiaengine.png differ
diff --git a/videos/Thumbs.db b/videos/Thumbs.db
deleted file mode 100644
index 3b026f51..00000000
Binary files a/videos/Thumbs.db and /dev/null differ