diff --git a/.all-contributorsrc b/.all-contributorsrc
index 6c094b5..d653e6a 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -70,6 +70,28 @@
"contributions": [
"ideas"
]
+ },
+ {
+ "login": "Xuanwo",
+ "name": "Xuanwo",
+ "avatar_url": "https://avatars.githubusercontent.com/u/5351546?v=4",
+ "profile": "https://xuanwo.io/",
+ "contributions": [
+ "content",
+ "design",
+ "ideas"
+ ]
+ },
+ {
+ "login": "Zheaoli",
+ "name": "Nadeshiko Manju",
+ "avatar_url": "https://avatars.githubusercontent.com/u/7054676?v=4",
+ "profile": "http://manjusaka.itscoder.com/",
+ "contributions": [
+ "bug",
+ "design",
+ "ideas"
+ ]
}
],
"contributorsPerLine": 7
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 684f87e..ce8938b 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -21,6 +21,17 @@ concurrency:
cancel-in-progress: true
jobs:
+ typos-check:
+ name: Spell Check with Typos
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Actions Repository
+ uses: actions/checkout@v3
+ - name: Check spelling with custom config file
+ uses: crate-ci/typos@v1.16.2
+ with:
+ config: ./typos.toml
+
test:
name: test
strategy:
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index c552033..cb2a31c 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -1,9 +1,8 @@
name: release
on:
- push:
- tags:
- - 'v*'
+ release:
+ types: [published]
pull_request:
paths:
- '.github/workflows/release.yml'
@@ -36,3 +35,49 @@ jobs:
args: release --rm-dist
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: upload gobin
+ uses: actions/upload-artifact@v3
+ with:
+ name: gobin_${{ github.event.release.tag_name }}
+ retention-days: 1
+ path: |
+ dist/mdz_linux_amd64_v1/mdz
+ if-no-files-found: error
+ pypi_publish:
+ needs: goreleaser
+ # only trigger on main repo when tag starts with v
+ if: github.repository == 'tensorchord/openmodelz' && startsWith(github.ref, 'refs/tags/v')
+ runs-on: ${{ matrix.os }}
+ timeout-minutes: 20
+ strategy:
+ matrix:
+ os: [ubuntu-20.04]
+ steps:
+ - uses: actions/checkout@v3
+ - name: Get gobin
+ uses: actions/download-artifact@v3
+ with:
+ name: gobin_${{ github.event.release.tag_name }}
+ path: dist/
+ - name: Configure linux build environment
+ if: runner.os == 'Linux'
+ run: |
+ mkdir -p mdz/bin
+ mv dist/mdz mdz/bin/mdz
+ chmod +x mdz/bin/mdz
+ - name: Build wheels
+ uses: pypa/cibuildwheel@v2.14.1
+ - name: Build source distribution
+ if: runner.os == 'Linux' # Only release source under linux to avoid conflict
+ run: |
+ python -m pip install wheel setuptools_scm
+ python setup.py sdist
+ mv dist/*.tar.gz wheelhouse/
+ - name: Upload to PyPI
+ env:
+ TWINE_USERNAME: __token__
+ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install twine
+ python -m twine upload wheelhouse/*
diff --git a/.gitignore b/.gitignore
index 849ddff..b40678e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,186 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+_version.txt
+_version.py
+wheelhouse/
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+.ruff_cache/
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..a8abe23
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,15 @@
+prune autoscaler
+prune ingress-operator
+prune modelzetes
+prune .github
+include LICENSE
+include README.md
+include .goreleaser.yaml
+include mdz/Makefile mdz/go.mod mdz/go.sum mdz/LICENSE
+graft mdz/pkg
+graft mdz/cmd
+prune mdz/bin
+prune mdz/docs
+prune mdz/examples
+graft agent/pkg
+prune agent/bin
diff --git a/README.md b/README.md
index f34ff43..9cdfb91 100644
--- a/README.md
+++ b/README.md
@@ -2,19 +2,16 @@
# OpenModelZ
-Simplify machine learning deployment for any environment.
-
+One-click machine learning deployment at scale on any cluster (GCP, AWS, Lambda labs, your home lab, or even a single machine)
+
-
-OpenModelZ (MDZ) provides a simple CLI to deploy and manage your machine learning workloads on any cloud or home lab.
-
## Why use OpenModelZ
OpenModelZ is the ideal solution for practitioners who want to quickly deploy their machine learning models to a (public or private) endpoint without the hassle of spending excessive time, money, and effort to figure out the entire end-to-end process.
@@ -27,28 +24,44 @@ We created OpenModelZ in response to the difficulties of finding a simple, cost-
With OpenModelZ, we take care of the underlying technical details for you, and provide a simple and easy-to-use CLI to deploy your models to **any cloud (GCP, AWS, or others), your home lab, or even a single machine**.
-You could **start from a single machine and scale it up to a cluster of machines** without any hassle. OpenModelZ lies at the heart of our [ModelZ](https://modelz.ai), which is a serverless inference platform. It's used in production to deploy models for our customers.
+You could **start from a single machine and scale it up to a cluster of machines** without any hassle. Besides this, We **provision a separate subdomain for each deployment** without any extra cost and effort, making each deployment easily accessible from the outside.
+
+OpenModelZ forms the core of our [ModelZ](https://modelz.ai) platform, which is a serverless machine learning inference service. It is utilized in a production environment to provision models for our clients.
## Quick Start 🚀
-Once you've installed the `mdz` you can start deploying models and experimenting with them.
+### Install `mdz`
-There are only two concepts in `mdz`:
+You can install OpenModelZ using the following command:
-- **Deployment**: A deployment is a running inference service. You could configure the number of replicas, the port, and the image, and some other parameters.
-- **Server**: A server is a machine that could run the deployments. It could be a cloud VM, a PC, or even a Raspberry Pi. You could start from a single server and scale it up to a cluster of machines without any hassle.
+```text copy
+pip install openmodelz
+```
+
+You could verify the installation by running the following command:
+
+```text copy
+mdz
+```
+
+Once you've installed the `mdz` you can start deploying models and experimenting with them.
### Bootstrap `mdz`
-It's super easy to bootstrap the `mdz` server. You just need to find a server (could be a cloud VM, a home lab, or even a single machine) and run the `mdz server start` command. The `mdz` server will be bootstrapped on the server and you could start deploying your models.
+It's super easy to bootstrap the `mdz` server. You just need to find a server (could be a cloud VM, a home lab, or even a single machine) and run the `mdz server start` command.
+
+> Notice: We may require the root permission to bootstrap the `mdz` server on port 80.
```
$ mdz server start
+🚧 Creating the server...
+🚧 Initializing the load balancer...
+🚧 Initializing the GPU resource...
🚧 Initializing the server...
🚧 Waiting for the server to be ready...
🐋 Checking if the server is running...
Agent:
- Version: v0.0.5
+ Version: v0.0.13
Build Date: 2023-07-19T09:12:55Z
Git Commit: 84d0171640453e9272f78a63e621392e93ef6bbb
Git State: clean
@@ -58,7 +71,7 @@ Agent:
🐳 The server is running at http://192.168.71.93.modelz.live
🎉 You could set the environment variable to get started!
-export MDZ_AGENT=http://192.168.71.93.modelz.live
+export MDZ_URL=http://192.168.71.93.modelz.live
```
The internal IP address will be used as the default endpoint of your deployments. You could provide the public IP address of your server to the `mdz server start` command to make it accessible from the outside world.
@@ -68,36 +81,78 @@ The internal IP address will be used as the default endpoint of your deployments
$ mdz server start 1.2.3.4
```
-### Create your first deployment
+You could also specify the registry mirror to speed up the image pulling process. Here is an example:
+
+```bash /--mirror-endpoints/
+$ mdz server start --mirror-endpoints https://docker.mirrors.sjtug.sjtu.edu.cn
+```
+
+### Create your first UI-based deployment
-Once you've bootstrapped the `mdz` server, you can start deploying your first applications.
+Once you've bootstrapped the `mdz` server, you can start deploying your first applications. We will use jupyter notebook as an example in this tutorial. You could use any docker image as your deployment.
+```text
+$ mdz deploy --image jupyter/minimal-notebook:lab-4.0.3 --name jupyter --port 8888 --command "jupyter notebook --ip='*' --NotebookApp.token='' --NotebookApp.password=''"
+Inference jupyter is created
+$ mdz list
+ NAME ENDPOINT STATUS INVOCATIONS REPLICAS
+ jupyter http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live Ready 488 1/1
+ http://192.168.71.93/inference/jupyter.default
```
-$ mdz deploy --image aikain/simplehttpserver:0.1 --name simple-server --port 80
+
+You could access the deployment by visiting the endpoint URL. The endpoint will be automatically generated for each deployment with the following format: `-..modelz.live`.
+
+It is `http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live` in this case. The endpoint could be accessed from the outside world as well if you've provided the public IP address of your server to the `mdz server start` command.
+
+![jupyter notebook](./images/jupyter.png)
+
+### Create your first OpenAI compatible API server
+
+You could also create API-based deployments. We will use [OpenAI compatible API server with Bloomz 560M](https://github.com/tensorchord/modelz-llm#run-the-self-hosted-api-server) as an example in this tutorial.
+
+```text
+$ mdz deploy --image modelzai/llm-bloomz-560m:23.07.4 --name simple-server
Inference simple-server is created
$ mdz list
- NAME ENDPOINT STATUS INVOCATIONS REPLICAS
- simple-server http://simple-server-4k2epq5lynxbaayn.192.168.71.93.modelz.live Ready 2 1/1
- http://192.168.71.93.modelz.live/inference/simple-server.default
+ NAME ENDPOINT STATUS INVOCATIONS REPLICAS
+ jupyter http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live Ready 488 1/1
+ http://192.168.71.93/inference/jupyter.default
+ simple-server http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live Ready 0 1/1
+ http://192.168.71.93/inference/simple-server.default
```
-You could access the deployment by visiting the endpoint URL. It will be `http://simple-server-4k2epq5lynxbaayn.192.168.71.93.modelz.live` in this case. The endpoint could be accessed from the outside world as well if you've provided the public IP address of your server to the `mdz server start` command.
+You could use OpenAI python package and the endpoint `http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live` in this case, to interact with the deployment.
+
+```python
+import openai
+openai.api_base="http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live"
+openai.api_key="any"
+
+# create a chat completion
+chat_completion = openai.ChatCompletion.create(model="bloomz", messages=[
+ {"role": "user", "content": "Who are you?"},
+ {"role": "assistant", "content": "I am a student"},
+ {"role": "user", "content": "What do you learn?"},
+], max_tokens=100)
+```
### Scale your deployment
You could scale your deployment by using the `mdz scale` command.
-```bash
+```text /scale/
$ mdz scale simple-server --replicas 3
```
-The requests will be load balanced between the replicas of your deployment.
+The requests will be load balanced between the replicas of your deployment.
+
+You could also tell the `mdz` to **autoscale your deployment** based on the inflight requests. Please check out the [Autoscaling](https://docs.open.modelz.ai/deployment/autoscale) documentation for more details.
### Debug your deployment
Sometimes you may want to debug your deployment. You could use the `mdz logs` command to get the logs of your deployment.
-```bash
+```text /logs/
$ mdz logs simple-server
simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:16] "GET / HTTP/1.1" 200 -
simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:16] "GET / HTTP/1.1" 200 -
@@ -106,22 +161,23 @@ simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:17] "GET / HTTP
You could also use the `mdz exec` command to execute a command in the container of your deployment. You do not need to ssh into the server to do that.
-```
+```text /exec/
$ mdz exec simple-server ps
PID USER TIME COMMAND
1 root 0:00 /usr/bin/dumb-init /bin/sh -c python3 -m http.server 80
7 root 0:00 /bin/sh -c python3 -m http.server 80
8 root 0:00 python3 -m http.server 80
9 root 0:00 ps
+```
+
+```text /exec/
$ mdz exec simple-server -ti bash
-bash-4.4# uname -r
-5.19.0-46-generic
bash-4.4#
```
Or you could port-forward the deployment to your local machine and debug it locally.
-```
+```text /port-forward/
$ mdz port-forward simple-server 7860
Forwarding inference simple-server to local port 7860
```
@@ -130,28 +186,27 @@ Forwarding inference simple-server to local port 7860
You could add more servers to your cluster by using the `mdz server join` command. The `mdz` server will be bootstrapped on the server and join the cluster automatically.
-```
+```text /join/
+$ mdz server join
$ mdz server list
NAME PHASE ALLOCATABLE CAPACITY
node1 Ready cpu: 16 cpu: 16
mem: 32784748Ki mem: 32784748Ki
+ gpu: 1 gpu: 1
node2 Ready cpu: 16 cpu: 16
mem: 32784748Ki mem: 32784748Ki
+ gpu: 1 gpu: 1
```
### Label your servers
You could label your servers to deploy your models to specific servers. For example, you could label your servers with `gpu=true` and deploy your models to servers with GPUs.
-```
+```text /--node-labels gpu=true,type=nvidia-a100/
$ mdz server label node3 gpu=true type=nvidia-a100
-$ mdz deploy --image aikain/simplehttpserver:0.1 --name simple-server --port 80 --node-labels gpu=true,type=nvidia-a100
+$ mdz deploy ... --node-labels gpu=true,type=nvidia-a100
```
-## More on documentation 📝
-
-See [OpenModelZ documentation](https://docs.open.modelz.ai/).
-
## Roadmap 🗂️
Please checkout [ROADMAP](https://docs.open.modelz.ai/community).
@@ -173,8 +228,12 @@ We welcome all kinds of contributions from the open-source community, individual
Ce Gao 💻 👀 ✅ |
Jinjing Zhou 💬 🐛 🤔 |
Keming 💻 🎨 🚇 |
+ Nadeshiko Manju 🐛 🎨 🤔 |
Teddy Xinyuan Chen 📖 |
+ Xuanwo 🖋 🎨 🤔 |
cutecutecat 🤔 |
+
+
xieydd 🤔 |
diff --git a/agent/README.md b/agent/README.md
index 2f7d274..c3eeaf6 100644
--- a/agent/README.md
+++ b/agent/README.md
@@ -1,8 +1,20 @@
+
+
# OpenModelZ Agent
-TODO:
+
+
+
+
+
+
+
+## Installation
+
+```
+pip install openmodelz
+```
+
+## Architecture
-- Make resolver optional
-- proxy with use case
-- mdz init
-- ai lab
+Please check out [Architecture](https://docs.open.modelz.ai/architecture) documentation.
diff --git a/agent/client/log.go b/agent/client/log.go
index e35fc49..13c60fd 100644
--- a/agent/client/log.go
+++ b/agent/client/log.go
@@ -12,13 +12,16 @@ import (
"net/url"
"strings"
+ "github.com/sirupsen/logrus"
"github.com/tensorchord/openmodelz/agent/api/types"
)
+const LogBufferSize = 128
+
// DeploymentLogGet gets the deployment logs.
func (cli *Client) DeploymentLogGet(ctx context.Context, namespace, name string,
- since string, tail int, end string) (
- []types.Message, error) {
+ since string, tail int, end string, follow bool) (
+ <-chan types.Message, error) {
urlValues := url.Values{}
urlValues.Add("namespace", namespace)
urlValues.Add("name", name)
@@ -35,26 +38,34 @@ func (cli *Client) DeploymentLogGet(ctx context.Context, namespace, name string,
urlValues.Add("tail", fmt.Sprintf("%d", tail))
}
- resp, err := cli.get(ctx, "/system/logs/inference", urlValues, nil)
- defer ensureReaderClosed(resp)
+ if follow {
+ urlValues.Add("follow", "true")
+ }
+ resp, err := cli.get(ctx, "/system/logs/inference", urlValues, nil)
+
if err != nil {
- return nil,
- wrapResponseError(err, resp, "deployment logs", name)
+ return nil, wrapResponseError(err, resp, "deployment logs", name)
}
-
+
+ stream := make(chan types.Message, LogBufferSize)
var log types.Message
- logs := []types.Message{}
scanner := bufio.NewScanner(resp.body)
- for scanner.Scan() {
- err = json.NewDecoder(strings.NewReader(scanner.Text())).Decode(&log)
- if err != nil {
- return nil, wrapResponseError(err, resp, "deployment logs", name)
+ go func () {
+ defer ensureReaderClosed(resp)
+ defer close(stream)
+ for scanner.Scan() {
+ err = json.Unmarshal(scanner.Bytes(), &log)
+ if err != nil {
+ logrus.Warnf("failed to decode %s log: %v | %s | [%s]", name, err, scanner.Text(), scanner.Err())
+ return
+ // continue
+ }
+ stream <- log
}
- logs = append(logs, log)
- }
+ }()
- return logs, err
+ return stream, err
}
func (cli *Client) BuildLogGet(ctx context.Context, namespace, name, since string,
diff --git a/agent/pkg/app/config.go b/agent/pkg/app/config.go
index 19b41f8..73ab589 100644
--- a/agent/pkg/app/config.go
+++ b/agent/pkg/app/config.go
@@ -11,7 +11,7 @@ func configFromCLI(c *cli.Context) config.Config {
// server
cfg.Server.Dev = c.Bool(flagDev)
- cfg.Server.ServerPort = c.Int(flageServerPort)
+ cfg.Server.ServerPort = c.Int(flagServerPort)
cfg.Server.ReadTimeout = c.Duration(flagServerReadTimeout)
cfg.Server.WriteTimeout = c.Duration(flagServerWriteTimeout)
diff --git a/agent/pkg/app/root.go b/agent/pkg/app/root.go
index e6bd2d8..871a326 100644
--- a/agent/pkg/app/root.go
+++ b/agent/pkg/app/root.go
@@ -21,7 +21,7 @@ const (
flagDev = "dev"
// server
- flageServerPort = "server-port"
+ flagServerPort = "server-port"
flagServerReadTimeout = "server-read-timeout"
flagServerWriteTimeout = "server-write-timeout"
@@ -89,7 +89,7 @@ func New() App {
Usage: "enable development mode",
},
&cli.IntFlag{
- Name: flageServerPort,
+ Name: flagServerPort,
Value: 8081,
Usage: "port to listen on",
EnvVars: []string{"MODELZ_AGENT_SERVER_PORT"},
diff --git a/agent/pkg/log/k8s.go b/agent/pkg/log/k8s.go
index 0914ef7..417fbb4 100644
--- a/agent/pkg/log/k8s.go
+++ b/agent/pkg/log/k8s.go
@@ -2,7 +2,6 @@ package log
import (
"bufio"
- "bytes"
"context"
"fmt"
"io"
@@ -63,6 +62,9 @@ func (k *K8sAPIRequestor) Query(ctx context.Context,
if err != nil {
return nil, errdefs.InvalidParameter(err)
}
+ } else if r.Follow {
+ // avoid truncate
+ endTime = time.Now().Add(time.Hour)
} else {
endTime = time.Now()
}
@@ -152,7 +154,7 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container,
opts.SinceSeconds = parseSince(since)
}
- stream, err := i.GetLogs(pod, opts).Stream(context.TODO())
+ stream, err := i.GetLogs(pod, opts).Stream(ctx)
if err != nil {
return err
}
@@ -160,14 +162,9 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container,
done := make(chan error)
go func() {
- reader := bufio.NewReader(stream)
- for {
- line, err := reader.ReadBytes('\n')
- if err != nil {
- done <- err
- return
- }
- msg, ts := extractTimestampAndMsg(string(bytes.Trim(line, "\x00")))
+ scanner := bufio.NewScanner(stream)
+ for scanner.Scan() {
+ msg, ts := extractTimestampAndMsg(scanner.Text())
dst <- types.Message{
Timestamp: ts,
Text: msg,
@@ -176,13 +173,19 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container,
Namespace: namespace,
}
}
+ if err := scanner.Err(); err != nil {
+ done <- err
+ return
+ }
}()
select {
case <-ctx.Done():
+ logrus.Debug("get-log context cancelled")
return ctx.Err()
case err := <-done:
if err != io.EOF {
+ logrus.Debugf("failed to read from pod log: %v", err)
return err
}
return nil
diff --git a/agent/pkg/prom/prometheus_query.go b/agent/pkg/prom/prometheus_query.go
index 0019f48..2b293bf 100644
--- a/agent/pkg/prom/prometheus_query.go
+++ b/agent/pkg/prom/prometheus_query.go
@@ -77,7 +77,7 @@ func (q PrometheusQuery) Fetch(query string) (*VectorQueryResponse, error) {
unmarshalErr := json.Unmarshal(bytesOut, &values)
if unmarshalErr != nil {
- return nil, fmt.Errorf("error unmarshaling result: %s, '%s'", unmarshalErr, string(bytesOut))
+ return nil, fmt.Errorf("error unmarshalling result: %s, '%s'", unmarshalErr, string(bytesOut))
}
return &values, nil
diff --git a/agent/pkg/runtime/util_domain.go b/agent/pkg/runtime/util_domain.go
index 3fac331..bc0429a 100644
--- a/agent/pkg/runtime/util_domain.go
+++ b/agent/pkg/runtime/util_domain.go
@@ -11,7 +11,7 @@ const (
)
const (
- // stdLen is a standard length of uniuri string to achive ~95 bits of entropy.
+ // stdLen is a standard length of uniuri string to achieve ~95 bits of entropy.
stdLen = 16
)
diff --git a/agent/pkg/server/handler_inference_logs.go b/agent/pkg/server/handler_inference_logs.go
index 5c19035..a0fc630 100644
--- a/agent/pkg/server/handler_inference_logs.go
+++ b/agent/pkg/server/handler_inference_logs.go
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"net/http"
+ "time"
"github.com/cockroachdb/errors"
"github.com/gin-gonic/gin"
@@ -46,8 +47,13 @@ func (s Server) getLogsFromRequester(c *gin.Context, requester log.Requester) er
}
_ = cn
- ctx, cancelQuery := context.WithTimeout(c.Request.Context(),
- s.config.Inference.LogTimeout)
+ timeout := s.config.Inference.LogTimeout
+ if req.Follow {
+ // use a much larger timeout for streaming log
+ timeout = time.Hour
+ }
+
+ ctx, cancelQuery := context.WithTimeout(c.Request.Context(), timeout)
defer cancelQuery()
messages, err := requester.Query(ctx, req)
diff --git a/agent/pkg/server/handler_root.go b/agent/pkg/server/handler_root.go
new file mode 100644
index 0000000..0fc6039
--- /dev/null
+++ b/agent/pkg/server/handler_root.go
@@ -0,0 +1,16 @@
+package server
+
+import (
+ "github.com/gin-gonic/gin"
+ "github.com/tensorchord/openmodelz/agent/pkg/server/static"
+)
+
+func (s *Server) handleRoot(c *gin.Context) error {
+ lp, err := static.RenderLoadingPage()
+ if err != nil {
+ return err
+ }
+
+ c.Data(200, "text/html; charset=utf-8", lp.Bytes())
+ return nil
+}
diff --git a/agent/pkg/server/server_init_route.go b/agent/pkg/server/server_init_route.go
index 4fbd0f9..9ba4ca1 100644
--- a/agent/pkg/server/server_init_route.go
+++ b/agent/pkg/server/server_init_route.go
@@ -38,6 +38,9 @@ func (s *Server) registerRoutes() {
// healthz
root.GET(endpointHealthz, WrapHandler(s.handleHealthz))
+ // landing page
+ root.GET("/", WrapHandler(s.handleRoot))
+
// control plane
controlPlane := root.Group("/system")
// inferences
diff --git a/agent/pkg/server/static/index.html b/agent/pkg/server/static/index.html
new file mode 100644
index 0000000..04d48d7
--- /dev/null
+++ b/agent/pkg/server/static/index.html
@@ -0,0 +1,339 @@
+
+
+
+
+
+
+
+ OpenModelZ Serving | Running
+
+
+
+
+
+
+
+ OpenModelZ server is running
+ Version: {{.Version}}
+
+
+
+ Please check out the documentation
+ for the next steps.
+
+ Please contact us
+ on discord if there is any issue.
+
+
+
+
+
+
+
diff --git a/agent/pkg/server/static/landing.go b/agent/pkg/server/static/landing.go
new file mode 100644
index 0000000..5b49de3
--- /dev/null
+++ b/agent/pkg/server/static/landing.go
@@ -0,0 +1,34 @@
+package static
+
+import (
+ "bytes"
+ _ "embed"
+ "html/template"
+
+ "github.com/tensorchord/openmodelz/agent/pkg/version"
+)
+
+//go:embed index.html
+var htmlTemplate string
+
+type htmlStruct struct {
+ Version string
+}
+
+func RenderLoadingPage() (*bytes.Buffer, error) {
+ tmpl, err := template.New("root").Parse(htmlTemplate)
+ if err != nil {
+ return nil, err
+ }
+
+ data := htmlStruct{
+ Version: version.GetAgentVersion(),
+ }
+
+ var buffer bytes.Buffer
+ if err := tmpl.Execute(&buffer, data); err != nil {
+ return nil, err
+ }
+
+ return &buffer, nil
+}
diff --git a/agent/pkg/version/version.go b/agent/pkg/version/version.go
index 20deaef..ca631b7 100644
--- a/agent/pkg/version/version.go
+++ b/agent/pkg/version/version.go
@@ -63,8 +63,8 @@ func SetGitTagForE2ETest(tag string) {
gitTag = tag
}
-// GetEnvdVersion gets Envd version information
-func GetEnvdVersion() string {
+// GetAgentVersion gets Envd version information
+func GetAgentVersion() string {
var versionStr string
if gitCommit != "" && gitTag != "" &&
@@ -95,7 +95,7 @@ func GetEnvdVersion() string {
// GetVersion returns the version information
func GetVersion() Version {
return Version{
- Version: GetEnvdVersion(),
+ Version: GetAgentVersion(),
BuildDate: buildDate,
GitCommit: gitCommit,
GitTag: gitTag,
diff --git a/autoscaler/pkg/autoscaler/scaler.go b/autoscaler/pkg/autoscaler/scaler.go
index cd05c00..51d3598 100644
--- a/autoscaler/pkg/autoscaler/scaler.go
+++ b/autoscaler/pkg/autoscaler/scaler.go
@@ -306,7 +306,7 @@ func (s *Scaler) AutoScale(interval time.Duration) {
"service": service,
"replicas": totalReplicas,
"expectedReplicas": expectedReplicas,
- "availabelReplicas": availableReplicas,
+ "availableReplicas": availableReplicas,
"currentLoad": lc.CurrentLoad,
"targetLoad": targetLoad,
"zeroDuration": zeroDuration,
diff --git a/autoscaler/pkg/prom/prom.go b/autoscaler/pkg/prom/prom.go
index 4436544..794c590 100644
--- a/autoscaler/pkg/prom/prom.go
+++ b/autoscaler/pkg/prom/prom.go
@@ -64,7 +64,7 @@ func (q PrometheusQuery) Fetch(query string) (*VectorQueryResponse, error) {
unmarshalErr := json.Unmarshal(bytesOut, &values)
if unmarshalErr != nil {
- return nil, fmt.Errorf("error unmarshaling result: %s, '%s'", unmarshalErr, string(bytesOut))
+ return nil, fmt.Errorf("error unmarshalling result: %s, '%s'", unmarshalErr, string(bytesOut))
}
return &values, nil
diff --git a/images/jupyter.png b/images/jupyter.png
new file mode 100644
index 0000000..ea10f0f
Binary files /dev/null and b/images/jupyter.png differ
diff --git a/ingress-operator/LICENSE b/ingress-operator/LICENSE
index 3804aa4..d53a9a9 100644
--- a/ingress-operator/LICENSE
+++ b/ingress-operator/LICENSE
@@ -1,5 +1,6 @@
MIT License
+Copyright (c) 2023 TensorChord Inc.
Copyright (c) 2017-2019 OpenFaaS Author(s)
Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/ingress-operator/pkg/controller/v1/controller_test.go b/ingress-operator/pkg/controller/v1/controller_test.go
index a38fd4b..b6ccee4 100644
--- a/ingress-operator/pkg/controller/v1/controller_test.go
+++ b/ingress-operator/pkg/controller/v1/controller_test.go
@@ -152,7 +152,7 @@ func Test_makeRules_Traefik_NestedPath_TrimsRegex_And_TrailingSlash(t *testing.T
}
}
-func Test_makTLS(t *testing.T) {
+func Test_makeTLS(t *testing.T) {
cases := []struct {
name string
diff --git a/mdz/LICENSE b/mdz/LICENSE
deleted file mode 100644
index e69de29..0000000
diff --git a/mdz/Makefile b/mdz/Makefile
index 92c273b..2f73a9e 100644
--- a/mdz/Makefile
+++ b/mdz/Makefile
@@ -109,12 +109,12 @@ export GOFLAGS ?= -count=1
.DEFAULT_GOAL:=build
-build: build-local ## Build the release version of envd
+build: build-release ## Build the release version
help: ## Display this help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
-debug: debug-local ## Build the debug version of envd
+debug: debug-local ## Build the debug version
# more info about `GOGC` env: https://github.com/golangci/golangci-lint#memory-usage-of-golangci-lint
lint: $(GOLANGCI_LINT) ## Lint GO code
@@ -138,11 +138,22 @@ swag-install:
build-local:
@for target in $(TARGETS); do \
- CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build -tags $(DASHBOARD_BUILD) -trimpath -v -o $(OUTPUT_DIR)/$${target} \
+ CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build -trimpath -v -o $(OUTPUT_DIR)/$${target} \
-ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE)" \
$(CMD_DIR)/$${target}; \
done
+build-release:
+ @for target in $(TARGETS); do \
+ CGO_ENABLED=$(CGO_ENABLED) go build -trimpath -o $(OUTPUT_DIR)/$${target} \
+ -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) \
+ -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) \
+ -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) \
+ -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE) \
+ -X $(ROOT)/pkg/version.gitTag=$(GIT_TAG)" \
+ $(CMD_DIR)/$${target}; \
+ done
+
# It is used by vscode to attach into the process.
debug-local:
@for target in $(TARGETS); do \
diff --git a/mdz/README.md b/mdz/README.md
index a220607..6996112 100644
--- a/mdz/README.md
+++ b/mdz/README.md
@@ -13,7 +13,9 @@ CLI for OpenModelZ.
## Installation
-TODO
+```
+pip install openmodelz
+```
## CLI Reference
diff --git a/mdz/go.mod b/mdz/go.mod
index c83dd38..b8fbbdb 100644
--- a/mdz/go.mod
+++ b/mdz/go.mod
@@ -70,6 +70,8 @@ require (
github.com/rivo/uniseg v0.2.0 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
+ github.com/segmentio/analytics-go/v3 v3.2.1 // indirect
+ github.com/segmentio/backo-go v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/swaggo/swag v1.8.12 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
@@ -82,5 +84,6 @@ require (
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.7.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
+ gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/mdz/go.sum b/mdz/go.sum
index 91cde3a..63fefe9 100644
--- a/mdz/go.sum
+++ b/mdz/go.sum
@@ -155,6 +155,10 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/segmentio/analytics-go/v3 v3.2.1 h1:G+f90zxtc1p9G+WigVyTR0xNfOghOGs/PYAlljLOyeg=
+github.com/segmentio/analytics-go/v3 v3.2.1/go.mod h1:p8owAF8X+5o27jmvUognuXxdtqvSGtD0ZrfY2kcS9bE=
+github.com/segmentio/backo-go v1.0.0 h1:kbOAtGJY2DqOR0jfRkYEorx/b18RgtepGtY3+Cpe6qA=
+github.com/segmentio/backo-go v1.0.0/go.mod h1:kJ9mm9YmoWSkk+oQ+5Cj8DEoRCX2JT6As4kEtIIOp1M=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
@@ -271,6 +275,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
diff --git a/mdz/pkg/cmd/deploy.go b/mdz/pkg/cmd/deploy.go
index 443ec5f..6b8cfb3 100644
--- a/mdz/pkg/cmd/deploy.go
+++ b/mdz/pkg/cmd/deploy.go
@@ -9,6 +9,8 @@ import (
petname "github.com/dustinkirkland/golang-petname"
"github.com/spf13/cobra"
"github.com/tensorchord/openmodelz/agent/api/types"
+ "github.com/tensorchord/openmodelz/agent/client"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
var (
@@ -21,6 +23,8 @@ var (
deployGPU int
deployNodeLabel []string
deployDetach bool
+ deployCommand string
+ deployProbePath string
)
// deployCmd represents the deploy command
@@ -55,9 +59,11 @@ func init() {
deployCmd.Flags().StringVar(&deployName, "name", "", "Name of inference")
deployCmd.Flags().StringSliceVarP(&deployNodeLabel, "node-labels", "l", []string{}, "Node labels")
deployCmd.Flags().BoolVar(&deployDetach, "detach", false, "If set, the command returns immediately without waiting for the deployment to complete")
+ deployCmd.Flags().StringVar(&deployCommand, "command", "", "Command to run")
+ deployCmd.Flags().StringVar(&deployProbePath, "probe-path", "", "HTTP Health probe path")
}
-func waitForDeploymentReady(cmd *cobra.Command, client *Client, namespace, name string, interval time.Duration, timeoutSeconds int) error {
+func waitForDeploymentReady(cmd *cobra.Command, client *client.Client, namespace, name string, interval time.Duration, timeoutSeconds int) error {
timeout := time.After(time.Duration(timeoutSeconds) * time.Second)
tick := time.Tick(interval)
@@ -115,6 +121,13 @@ func commandDeploy(cmd *cobra.Command, args []string) error {
},
}
+ if deployCommand != "" {
+ inf.Spec.Command = &deployCommand
+ }
+ if deployProbePath != "" {
+ inf.Spec.HTTPProbePath = &deployProbePath
+ }
+
if len(deployNodeLabel) > 0 {
inf.Spec.Constraints = []string{}
for _, label := range deployNodeLabel {
@@ -132,6 +145,12 @@ func commandDeploy(cmd *cobra.Command, args []string) error {
}
}
+ telemetry.GetTelemetry().Record(
+ "deploy",
+ telemetry.AddField("GPU", deployGPU),
+ telemetry.AddField("FromZero", deployMinReplicas == 0),
+ )
+
if _, err := agentClient.InferenceCreate(
cmd.Context(), namespace, inf); err != nil {
cmd.PrintErrf("Failed to create the inference: %s\n", errors.Cause(err))
diff --git a/mdz/pkg/cmd/list.go b/mdz/pkg/cmd/list.go
index 60573f1..cd2d2c8 100644
--- a/mdz/pkg/cmd/list.go
+++ b/mdz/pkg/cmd/list.go
@@ -8,6 +8,7 @@ import (
"github.com/jedib0t/go-pretty/v6/table"
"github.com/spf13/cobra"
"github.com/tensorchord/openmodelz/agent/api/types"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
const (
@@ -48,6 +49,7 @@ func init() {
}
func commandList(cmd *cobra.Command, args []string) error {
+ telemetry.GetTelemetry().Record("list")
infs, err := agentClient.InferenceList(cmd.Context(), namespace)
if err != nil {
cmd.PrintErrf("Failed to list inferences: %v\n", err)
diff --git a/mdz/pkg/cmd/logs.go b/mdz/pkg/cmd/logs.go
index 53bcdd2..d0a3e30 100644
--- a/mdz/pkg/cmd/logs.go
+++ b/mdz/pkg/cmd/logs.go
@@ -9,6 +9,7 @@ var (
tail int
since string
end string
+ follow bool
)
// logCmd represents the log command
@@ -36,15 +37,16 @@ func init() {
logsCmd.Flags().IntVarP(&tail, "tail", "t", 0, "Number of lines to show from the end of the logs")
logsCmd.Flags().StringVarP(&since, "since", "s", "2006-01-02T15:04:05Z", "Show logs since timestamp (e.g. 2013-01-02T13:23:37Z) or relative (e.g. 42m for 42 minutes)")
logsCmd.Flags().StringVarP(&end, "end", "e", "", "Only return logs before this timestamp (e.g. 2013-01-02T13:23:37Z) or relative (e.g. 42m for 42 minutes)")
+ logsCmd.Flags().BoolVarP(&follow, "follow", "f", false, "Follow log output")
}
func commandLogs(cmd *cobra.Command, args []string) error {
- logs, err := agentClient.DeploymentLogGet(cmd.Context(), namespace, args[0], since, tail, end)
+ logStream, err := agentClient.DeploymentLogGet(cmd.Context(), namespace, args[0], since, tail, end, follow)
if err != nil {
cmd.PrintErrf("Failed to get logs: %s\n", err)
return err
}
- for _, log := range logs {
+ for log := range logStream {
cmd.Printf("%s: %s\n", log.Instance, log.Text)
}
return nil
diff --git a/mdz/pkg/cmd/root.go b/mdz/pkg/cmd/root.go
index 6c4f46b..82d02bb 100644
--- a/mdz/pkg/cmd/root.go
+++ b/mdz/pkg/cmd/root.go
@@ -9,13 +9,15 @@ import (
"github.com/spf13/cobra/doc"
"github.com/tensorchord/openmodelz/agent/client"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
var (
// Used for flags.
- mdzURL string
- namespace string
- debug bool
+ mdzURL string
+ namespace string
+ debug bool
+ disableTelemetry bool
agentClient *client.Client
)
@@ -62,6 +64,8 @@ func init() {
rootCmd.PersistentFlags().BoolVarP(&debug, "debug", "", false, "Enable debug logging")
+ rootCmd.PersistentFlags().BoolVarP(&disableTelemetry, "disable-telemetry", "", false, "Disable anonymous telemetry")
+
// Cobra also supports local flags, which will only run
// when this action is called directly.
rootCmd.AddGroup(&cobra.Group{ID: "basic", Title: "Basic Commands:"})
@@ -76,8 +80,8 @@ func commandInit(cmd *cobra.Command, args []string) error {
if agentClient == nil {
if mdzURL == "" {
- // Checkout environment variable MDZ_AGENT.
- mdzURL = os.Getenv("MDZ_AGENT")
+ // Checkout environment variable MDZ_URL.
+ mdzURL = os.Getenv("MDZ_URL")
}
if mdzURL == "" {
mdzURL = "http://localhost:80"
@@ -89,6 +93,10 @@ func commandInit(cmd *cobra.Command, args []string) error {
return err
}
}
+
+ if err := telemetry.Initialize(!disableTelemetry); err != nil {
+ logrus.WithError(err).Debug("Failed to initialize telemetry")
+ }
return nil
}
diff --git a/mdz/pkg/cmd/scale.go b/mdz/pkg/cmd/scale.go
index 5d97b73..eb63700 100644
--- a/mdz/pkg/cmd/scale.go
+++ b/mdz/pkg/cmd/scale.go
@@ -2,6 +2,7 @@ package cmd
import (
"github.com/spf13/cobra"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
var (
@@ -73,6 +74,8 @@ func commandScale(cmd *cobra.Command, args []string) error {
deployment.Spec.Scaling.TargetLoad = int32Ptr(targetInflightRequests)
}
+ telemetry.GetTelemetry().Record("scale")
+
if _, err := agentClient.DeploymentUpdate(cmd.Context(), namespace, deployment); err != nil {
cmd.PrintErrf("Failed to update deployment: %s\n", err)
return err
diff --git a/mdz/pkg/cmd/server.go b/mdz/pkg/cmd/server.go
index 1ef51e3..bf078c5 100644
--- a/mdz/pkg/cmd/server.go
+++ b/mdz/pkg/cmd/server.go
@@ -7,8 +7,10 @@ import (
)
var (
- serverVerbose bool
- serverPollingInterval time.Duration
+ serverVerbose bool
+ serverPollingInterval time.Duration = 3 * time.Second
+ serverRegistryMirrorName string
+ serverRegistryMirrorEndpoints []string
)
// serverCmd represents the server command
@@ -29,7 +31,6 @@ func init() {
// Cobra supports Persistent Flags which will work for this command
// and all subcommands, e.g.:
serverCmd.PersistentFlags().BoolVarP(&serverVerbose, "verbose", "v", false, "Verbose output")
- serverCmd.PersistentFlags().DurationVarP(&serverPollingInterval, "polling-interval", "p", 3*time.Second, "Polling interval")
// Cobra supports local flags which will only run when this command
// is called directly, e.g.:
diff --git a/mdz/pkg/cmd/server_destroy.go b/mdz/pkg/cmd/server_destroy.go
new file mode 100644
index 0000000..36792c9
--- /dev/null
+++ b/mdz/pkg/cmd/server_destroy.go
@@ -0,0 +1,50 @@
+package cmd
+
+import (
+ "github.com/cockroachdb/errors"
+ "github.com/spf13/cobra"
+
+ "github.com/tensorchord/openmodelz/mdz/pkg/server"
+)
+
+// serverDestroyCmd represents the server destroy command
+var serverDestroyCmd = &cobra.Command{
+ Use: "destroy",
+ Short: "Destroy the cluster",
+ Long: `Destroy the cluster`,
+ Example: ` mdz server destroy`,
+ PreRunE: commandInitLog,
+ RunE: commandServerDestroy,
+}
+
+func init() {
+ serverCmd.AddCommand(serverDestroyCmd)
+
+ // Here you will define your flags and configuration settings.
+
+ // Cobra supports Persistent Flags which will work for this command
+ // and all subcommands, e.g.:
+
+ // Cobra supports local flags which will only run when this command
+ // is called directly, e.g.:
+}
+
+func commandServerDestroy(cmd *cobra.Command, args []string) error {
+ engine, err := server.NewDestroy(server.Options{
+ Verbose: serverVerbose,
+ OutputStream: cmd.ErrOrStderr(),
+ RetryInternal: serverPollingInterval,
+ })
+ if err != nil {
+ cmd.PrintErrf("Failed to destroy the server: %s\n", errors.Cause(err))
+ return err
+ }
+
+ _, err = engine.Run()
+ if err != nil {
+ cmd.PrintErrf("Failed to destroy the server: %s\n", errors.Cause(err))
+ return err
+ }
+ cmd.Printf("✅ Server destroyed\n")
+ return nil
+}
diff --git a/mdz/pkg/cmd/server_join.go b/mdz/pkg/cmd/server_join.go
index f5c2316..f996a24 100644
--- a/mdz/pkg/cmd/server_join.go
+++ b/mdz/pkg/cmd/server_join.go
@@ -5,6 +5,7 @@ import (
"github.com/spf13/cobra"
"github.com/tensorchord/openmodelz/mdz/pkg/server"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
// serverJoinCmd represents the server join command
@@ -28,6 +29,10 @@ func init() {
// Cobra supports local flags which will only run when this command
// is called directly, e.g.:
+ serverJoinCmd.Flags().StringVarP(&serverRegistryMirrorName, "mirror-name", "",
+ "docker.io", "Mirror domain name of the registry")
+ serverJoinCmd.Flags().StringArrayVarP(&serverRegistryMirrorEndpoints, "mirror-endpoints", "",
+ []string{}, "Mirror URL endpoints of the registry like `https://quay.io`")
}
func commandServerJoin(cmd *cobra.Command, args []string) error {
@@ -36,12 +41,18 @@ func commandServerJoin(cmd *cobra.Command, args []string) error {
OutputStream: cmd.ErrOrStderr(),
RetryInternal: serverPollingInterval,
ServerIP: args[0],
+ Mirror: server.Mirror{
+ Name: serverRegistryMirrorName,
+ Endpoints: serverRegistryMirrorEndpoints,
+ },
})
if err != nil {
- cmd.PrintErrf("Failed to join the cluster: %s\n", errors.Cause(err))
+ cmd.PrintErrf("Failed to configure before join: %s\n", errors.Cause(err))
return err
}
+ telemetry.GetTelemetry().Record("server join")
+
_, err = engine.Run()
if err != nil {
cmd.PrintErrf("Failed to join the cluster: %s\n", errors.Cause(err))
diff --git a/mdz/pkg/cmd/server_list.go b/mdz/pkg/cmd/server_list.go
index 6bb0f1d..7f05c50 100644
--- a/mdz/pkg/cmd/server_list.go
+++ b/mdz/pkg/cmd/server_list.go
@@ -2,11 +2,16 @@ package cmd
import (
"fmt"
+ "math"
"github.com/cockroachdb/errors"
"github.com/jedib0t/go-pretty/v6/table"
+ "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
+ "k8s.io/apimachinery/pkg/api/resource"
+
"github.com/tensorchord/openmodelz/agent/api/types"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
)
var (
@@ -40,6 +45,7 @@ func init() {
}
func commandServerList(cmd *cobra.Command, args []string) error {
+ telemetry.GetTelemetry().Record("server list")
servers, err := agentClient.ServerList(cmd.Context())
if err != nil {
cmd.PrintErrf("Failed to list servers: %s\n", errors.Cause(err))
@@ -100,13 +106,41 @@ func labelsString(labels map[string]string) string {
for k, v := range labels {
res += fmt.Sprintf("%s=%s\n", k, v)
}
+ if len(res) == 0 {
+ return res
+ }
return res[:len(res)-1]
}
+func prettyByteSize(quantity string) (string, error) {
+ r, err := resource.ParseQuantity(quantity)
+ if err != nil {
+ return "", err
+ }
+ bf := float64(r.Value())
+ for _, unit := range []string{"", "Ki", "Mi", "Gi", "Ti"} {
+ if math.Abs(bf) < 1024.0 {
+ return fmt.Sprintf("%3.1f%sB", bf, unit), nil
+ }
+ bf /= 1024.0
+ }
+ return fmt.Sprintf("%.1fPiB", bf), nil
+}
+
func resourceListString(l types.ResourceList) string {
- res := fmt.Sprintf("cpu: %s\nmem: %s", l["cpu"], l["memory"])
- if l["nvidia.com/gpu"] != "" {
- res += fmt.Sprintf("\ngpu: %s", l["nvidia.com/gpu"])
+ res := fmt.Sprintf("cpu: %s", l[types.ResourceCPU])
+ memory, ok := l[types.ResourceMemory]
+ if ok {
+ prettyMem, err := prettyByteSize(string(memory))
+ if err != nil {
+ logrus.Infof("failed to parse the memory quantity: %s", memory)
+ } else {
+ memory = types.Quantity(prettyMem)
+ }
+ }
+ res += fmt.Sprintf("\nmemory: %s", memory)
+ if l[types.ResourceGPU] != "" {
+ res += fmt.Sprintf("\ngpu: %s", l[types.ResourceGPU])
}
return res
}
diff --git a/mdz/pkg/cmd/server_start.go b/mdz/pkg/cmd/server_start.go
index 4332f65..468100f 100644
--- a/mdz/pkg/cmd/server_start.go
+++ b/mdz/pkg/cmd/server_start.go
@@ -9,11 +9,15 @@ import (
"github.com/tensorchord/openmodelz/agent/pkg/consts"
"github.com/tensorchord/openmodelz/mdz/pkg/server"
+ "github.com/tensorchord/openmodelz/mdz/pkg/telemetry"
+ "github.com/tensorchord/openmodelz/mdz/pkg/version"
)
var (
serverStartRuntime string
serverStartDomain string = consts.Domain
+ serverStartVersion string
+ serverStartWithGPU bool
)
// serverStartCmd represents the server start command
@@ -40,6 +44,15 @@ func init() {
// Cobra supports local flags which will only run when this command
// is called directly, e.g.:
// serverStartCmd.Flags().StringVarP(&serverStartRuntime, "runtime", "r", "k3s", "Runtime to use (k3s, docker) in the started server")
+ serverStartCmd.Flags().StringVarP(&serverStartVersion, "version", "",
+ version.HelmChartVersion, "Version of the server to start")
+ serverStartCmd.Flags().MarkHidden("version")
+ serverStartCmd.Flags().BoolVarP(&serverStartWithGPU, "force-gpu", "g",
+ false, "Start the server with GPU support (ignore the GPU detection)")
+ serverStartCmd.Flags().StringVarP(&serverRegistryMirrorName, "mirror-name", "",
+ "docker.io", "Mirror domain name of the registry")
+ serverStartCmd.Flags().StringArrayVarP(&serverRegistryMirrorEndpoints, "mirror-endpoints", "",
+ []string{}, "Mirror URL endpoints of the registry like `https://quay.io`")
}
func commandServerStart(cmd *cobra.Command, args []string) error {
@@ -48,12 +61,24 @@ func commandServerStart(cmd *cobra.Command, args []string) error {
domainWithSuffix := fmt.Sprintf("%s.%s", args[0], serverStartDomain)
domain = &domainWithSuffix
}
+ defer func(start time.Time) {
+ telemetry.GetTelemetry().Record(
+ "server start",
+ telemetry.AddField("duration", time.Since(start).Seconds()),
+ )
+ }(time.Now())
engine, err := server.NewStart(server.Options{
Verbose: serverVerbose,
Runtime: server.Runtime(serverStartRuntime),
OutputStream: cmd.ErrOrStderr(),
RetryInternal: serverPollingInterval,
Domain: domain,
+ Version: serverStartVersion,
+ ForceGPU: serverStartWithGPU,
+ Mirror: server.Mirror{
+ Name: serverRegistryMirrorName,
+ Endpoints: serverRegistryMirrorEndpoints,
+ },
})
if err != nil {
cmd.PrintErrf("Failed to start the server: %s\n", errors.Cause(err))
@@ -83,6 +108,6 @@ func commandServerStart(cmd *cobra.Command, args []string) error {
}
cmd.Printf("🐳 The server is running at %s\n", mdzURL)
cmd.Printf("🎉 You could set the environment variable to get started!\n\n")
- cmd.Printf("export MDZ_AGENT=%s\n", mdzURL)
+ cmd.Printf("export MDZ_URL=%s\n", mdzURL)
return nil
}
diff --git a/mdz/pkg/cmd/version.go b/mdz/pkg/cmd/version.go
index 72df358..f6650a2 100644
--- a/mdz/pkg/cmd/version.go
+++ b/mdz/pkg/cmd/version.go
@@ -53,8 +53,6 @@ func printServerVersion(cmd *cobra.Command) error {
}
cmd.Println("Server:")
- cmd.Printf(" Name: \t\t%s\n", info.Name)
- cmd.Printf(" Orchestration: %s\n", info.Orchestration)
cmd.Printf(" Version: \t%s\n", info.Version.Version)
cmd.Printf(" Build Date: \t%s\n", info.Version.BuildDate)
cmd.Printf(" Git Commit: \t%s\n", info.Version.GitCommit)
diff --git a/mdz/pkg/server/agentd_run.go b/mdz/pkg/server/agentd_run.go
index 2bef043..9c16080 100644
--- a/mdz/pkg/server/agentd_run.go
+++ b/mdz/pkg/server/agentd_run.go
@@ -11,7 +11,7 @@ type agentDRunStep struct {
}
// TODO(gaocegege): There is still a bug, thus it cannot be used actually.
-// The process wil exit after the command returns. We need to put it in systemd.
+// The process will exit after the command returns. We need to put it in systemd.
func (s *agentDRunStep) Run() error {
fmt.Fprintf(s.options.OutputStream, "🚧 Running the agent for docker runtime...\n")
cmd := exec.Command("/bin/sh", "-c", "mdz local-agent &")
diff --git a/mdz/pkg/server/engine.go b/mdz/pkg/server/engine.go
index ace9d7e..f92ccec 100644
--- a/mdz/pkg/server/engine.go
+++ b/mdz/pkg/server/engine.go
@@ -14,9 +14,21 @@ type Options struct {
Verbose bool
OutputStream io.Writer
Runtime Runtime
+ Mirror Mirror
RetryInternal time.Duration
ServerIP string
Domain *string
+ Version string
+ ForceGPU bool
+}
+
+type Mirror struct {
+ Name string
+ Endpoints []string
+}
+
+func (m *Mirror) Configured() bool {
+ return m.Name != "" && len(m.Endpoints) > 0
}
type Runtime string
@@ -36,6 +48,9 @@ type Result struct {
}
func NewStart(o Options) (*Engine, error) {
+ if o.Verbose {
+ fmt.Fprintf(o.OutputStream, "Starting the server with config: %+v\n", o)
+ }
var engine *Engine
switch o.Runtime {
case RuntimeDocker:
@@ -52,6 +67,9 @@ func NewStart(o Options) (*Engine, error) {
options: o,
Steps: []Step{
// Install k3s and related tools.
+ &k3sPrepare{
+ options: o,
+ },
&k3sInstallStep{
options: o,
},
@@ -82,11 +100,22 @@ func NewStop(o Options) (*Engine, error) {
}, nil
}
+func NewDestroy(o Options) (*Engine, error) {
+ return &Engine{
+ options: o,
+ Steps: []Step{
+ // Destroy all k3s and related tools.
+ &k3sDestroyAllStep{
+ options: o,
+ },
+ },
+ }, nil
+}
+
func NewJoin(o Options) (*Engine, error) {
return &Engine{
options: o,
Steps: []Step{
- // Kill all k3s and related tools.
&k3sJoinStep{
options: o,
},
@@ -118,6 +147,12 @@ func (e *Engine) Run() (*Result, error) {
MDZURL: fmt.Sprintf("http://%s", *e.options.Domain),
}, nil
}
+ // Get the server IP.
+ if resultDomain != "" {
+ return &Result{
+ MDZURL: fmt.Sprintf("http://%s", resultDomain),
+ }, nil
+ }
return &Result{
MDZURL: fmt.Sprintf("http://0.0.0.0:%d", AgentPort),
}, nil
diff --git a/mdz/pkg/server/gpu_install.go b/mdz/pkg/server/gpu_install.go
index 8580c79..a3f2786 100644
--- a/mdz/pkg/server/gpu_install.go
+++ b/mdz/pkg/server/gpu_install.go
@@ -4,7 +4,10 @@ import (
_ "embed"
"fmt"
"io"
+ "os"
"os/exec"
+ "path/filepath"
+ "regexp"
"syscall"
)
@@ -16,7 +19,44 @@ type gpuInstallStep struct {
options Options
}
+// check if the Nvidia Toolkit is installed on the host
+func (s *gpuInstallStep) hasNvidiaToolkit() bool {
+ locations := []string{
+ "/usr/local/nvidia/toolkit",
+ "/usr/bin",
+ }
+ binaryNames := []string{
+ "nvidia-container-runtime",
+ "nvidia-container-runtime-experimental",
+ }
+ for _, location := range locations {
+ for _, name := range binaryNames {
+ path := filepath.Join(location, name)
+ if _, err := os.Stat(path); err == nil {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+func (s *gpuInstallStep) hasNvidiaDevice() bool {
+ output, err := exec.Command("/bin/sh", "-c", "lspci").Output()
+ if err != nil {
+ return false
+ }
+ regexNvidia := regexp.MustCompile("(?i)nvidia")
+ return regexNvidia.Match(output)
+}
+
func (s *gpuInstallStep) Run() error {
+ if !s.options.ForceGPU {
+ // detect GPU
+ if !(s.hasNvidiaDevice() || s.hasNvidiaToolkit()) {
+ fmt.Fprintf(s.options.OutputStream, "🚧 Nvidia Toolkit is missing, skip the GPU initialization.\n")
+ return nil
+ }
+ }
fmt.Fprintf(s.options.OutputStream, "🚧 Initializing the GPU resource...\n")
cmd := exec.Command("/bin/sh", "-c", "sudo k3s kubectl apply -f -")
@@ -41,6 +81,8 @@ func (s *gpuInstallStep) Run() error {
if _, err := io.WriteString(stdin, gpuYamlContent); err != nil {
return err
}
+ // Close the input stream to finish the pipe. Then the command will use the
+ // input from the pipe to start the next process.
stdin.Close()
if err := cmd.Wait(); err != nil {
diff --git a/mdz/pkg/server/k3s_destroy.go b/mdz/pkg/server/k3s_destroy.go
new file mode 100644
index 0000000..38158b3
--- /dev/null
+++ b/mdz/pkg/server/k3s_destroy.go
@@ -0,0 +1,38 @@
+package server
+
+import (
+ "fmt"
+ "os/exec"
+ "syscall"
+)
+
+// k3sDestroyAllStep installs k3s and related tools.
+type k3sDestroyAllStep struct {
+ options Options
+}
+
+func (s *k3sDestroyAllStep) Run() error {
+ fmt.Fprintf(s.options.OutputStream, "🚧 Destroy the OpenModelz Cluster...\n")
+ // TODO(gaocegege): Embed the script into the binary.
+ cmd := exec.Command("/bin/sh", "-c", "/usr/local/bin/k3s-uninstall.sh")
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Pdeathsig: syscall.SIGKILL,
+ }
+ if s.options.Verbose {
+ cmd.Stderr = s.options.OutputStream
+ cmd.Stdout = s.options.OutputStream
+ } else {
+ cmd.Stdout = nil
+ cmd.Stderr = nil
+ }
+ err := cmd.Run()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *k3sDestroyAllStep) Verify() error {
+ return nil
+}
diff --git a/mdz/pkg/server/k3s_install.go b/mdz/pkg/server/k3s_install.go
index 0144b2e..6a823ec 100644
--- a/mdz/pkg/server/k3s_install.go
+++ b/mdz/pkg/server/k3s_install.go
@@ -57,6 +57,8 @@ func (s *k3sInstallStep) Run() error {
if _, err := io.WriteString(stdin, bashContent); err != nil {
return err
}
+ // Close the input stream to finish the pipe. Then the command will use the
+ // input from the pipe to start the next process.
stdin.Close()
fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be created...\n")
diff --git a/mdz/pkg/server/k3s_join.go b/mdz/pkg/server/k3s_join.go
index a5c79ae..253365f 100644
--- a/mdz/pkg/server/k3s_join.go
+++ b/mdz/pkg/server/k3s_join.go
@@ -49,6 +49,8 @@ func (s *k3sJoinStep) Run() error {
if _, err := io.WriteString(stdin, bashContent); err != nil {
return err
}
+ // Close the input stream to finish the pipe. Then the command will use the
+ // input from the pipe to start the next process.
stdin.Close()
fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be ready...\n")
diff --git a/mdz/pkg/server/k3s_killall.go b/mdz/pkg/server/k3s_killall.go
index 43f2614..b797d31 100644
--- a/mdz/pkg/server/k3s_killall.go
+++ b/mdz/pkg/server/k3s_killall.go
@@ -12,7 +12,7 @@ type k3sKillAllStep struct {
}
func (s *k3sKillAllStep) Run() error {
- fmt.Fprintf(s.options.OutputStream, "🚧 Stopping all the processes...\n")
+ fmt.Fprintf(s.options.OutputStream, "🚧 Stopping the OpenModelz Cluster...\n")
// TODO(gaocegege): Embed the script into the binary.
cmd := exec.Command("/bin/sh", "-c", "/usr/local/bin/k3s-killall.sh")
cmd.SysProcAttr = &syscall.SysProcAttr{
diff --git a/mdz/pkg/server/k3s_prepare.go b/mdz/pkg/server/k3s_prepare.go
new file mode 100644
index 0000000..2e3d77d
--- /dev/null
+++ b/mdz/pkg/server/k3s_prepare.go
@@ -0,0 +1,66 @@
+package server
+
+import (
+ _ "embed"
+ "fmt"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "text/template"
+)
+
+//go:embed registries.yaml
+var registriesContent string
+
+const mirrorPath = "/etc/rancher/k3s"
+const mirrorFile = "registries.yaml"
+
+// k3sPrepare install everything required by k3s.
+type k3sPrepare struct {
+ options Options
+}
+
+func (s *k3sPrepare) Run() error {
+ if !s.options.Mirror.Configured() {
+ return nil
+ }
+ fmt.Fprintf(s.options.OutputStream, "🚧 Configure the mirror...\n")
+
+ tmpl, err := template.New("registries").Parse(registriesContent)
+ if err != nil {
+ panic(err)
+ }
+ buf := strings.Builder{}
+ err = tmpl.Execute(&buf, s.options.Mirror)
+ if err != nil {
+ panic(err)
+ }
+
+ cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf(
+ "sudo mkdir -p %s && sudo tee %s > /dev/null << EOF\n%s\nEOF",
+ mirrorPath,
+ filepath.Join(mirrorPath, mirrorFile),
+ buf.String(),
+ ))
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Pdeathsig: syscall.SIGKILL,
+ }
+ if s.options.Verbose {
+ cmd.Stderr = s.options.OutputStream
+ cmd.Stdout = s.options.OutputStream
+ } else {
+ cmd.Stdout = nil
+ cmd.Stderr = nil
+ }
+ err = cmd.Run()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *k3sPrepare) Verify() error {
+ return nil
+}
diff --git a/mdz/pkg/server/nginx_install.go b/mdz/pkg/server/nginx_install.go
index 5841749..651b4e4 100644
--- a/mdz/pkg/server/nginx_install.go
+++ b/mdz/pkg/server/nginx_install.go
@@ -42,6 +42,8 @@ func (s *nginxInstallStep) Run() error {
if _, err := io.WriteString(stdin, nginxYamlContent); err != nil {
return err
}
+ // Close the input stream to finish the pipe. Then the command will use the
+ // input from the pipe to start the next process.
stdin.Close()
if err := cmd.Wait(); err != nil {
diff --git a/mdz/pkg/server/openmodelz.yaml b/mdz/pkg/server/openmodelz.yaml
index 32135f8..94e148c 100644
--- a/mdz/pkg/server/openmodelz.yaml
+++ b/mdz/pkg/server/openmodelz.yaml
@@ -14,7 +14,7 @@ spec:
chart: openmodelz
repo: https://tensorchord.github.io/openmodelz-charts
targetNamespace: openmodelz
- version: 0.0.8
+ version: {{.Version}}
set:
valuesContent: |-
fullnameOverride: openmodelz
diff --git a/mdz/pkg/server/openmodelz_install.go b/mdz/pkg/server/openmodelz_install.go
index 97030ca..f5a4e0a 100644
--- a/mdz/pkg/server/openmodelz_install.go
+++ b/mdz/pkg/server/openmodelz_install.go
@@ -6,6 +6,7 @@ import (
"html/template"
"io"
"os/exec"
+ "regexp"
"strings"
"syscall"
@@ -15,6 +16,8 @@ import (
//go:embed openmodelz.yaml
var yamlContent string
+var resultDomain string
+
// openModelZInstallStep installs the OpenModelZ deployments.
type openModelZInstallStep struct {
options Options
@@ -47,7 +50,10 @@ func (s *openModelZInstallStep) Run() error {
variables := struct {
Domain string
IpToDomain bool
- }{}
+ Version string
+ }{
+ Version: s.options.Version,
+ }
if s.options.Domain != nil {
variables.Domain = *s.options.Domain
variables.IpToDomain = false
@@ -66,9 +72,14 @@ func (s *openModelZInstallStep) Run() error {
panic(err)
}
+ logrus.WithField("variables", variables).
+ Debugf("Deploying OpenModelZ with the following variables")
+
if _, err := io.WriteString(stdin, buf.String()); err != nil {
return err
}
+ // Close the input stream to finish the pipe. Then the command will use the
+ // input from the pipe to start the next process.
stdin.Close()
fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be ready...\n")
@@ -90,8 +101,17 @@ func (s *openModelZInstallStep) Verify() error {
return err
}
logrus.Debugf("kubectl get cmd output: %s\n", output)
- if len(output) == 0 {
+ if len(output) <= 4 {
return fmt.Errorf("cannot get the ingress ip: output is empty")
}
+
+ // Get the IP from the output lie this: `[{"ip":"192.168.71.93"}]`
+ re := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
+ found := re.MatchString(string(output))
+ if !found {
+ return fmt.Errorf("cannot get the ingress ip")
+ }
+
+ resultDomain = re.FindString(string(output))
return nil
}
diff --git a/mdz/pkg/server/registries.yaml b/mdz/pkg/server/registries.yaml
new file mode 100644
index 0000000..e073e1d
--- /dev/null
+++ b/mdz/pkg/server/registries.yaml
@@ -0,0 +1,4 @@
+mirrors:
+ {{ .Name }}:
+ endpoint:
+ {{ range $endpoint := .Endpoints }}- "{{ $endpoint }}"{{ end }}
diff --git a/mdz/pkg/telemetry/telemetry.go b/mdz/pkg/telemetry/telemetry.go
new file mode 100644
index 0000000..580a9bd
--- /dev/null
+++ b/mdz/pkg/telemetry/telemetry.go
@@ -0,0 +1,166 @@
+package telemetry
+
+import (
+ "io"
+ "os"
+ "path/filepath"
+ "runtime"
+ "sync"
+ "time"
+
+ "github.com/cockroachdb/errors"
+ "github.com/google/uuid"
+ segmentio "github.com/segmentio/analytics-go/v3"
+ "github.com/sirupsen/logrus"
+
+ "github.com/tensorchord/openmodelz/mdz/pkg/version"
+)
+
+type TelemetryField func(*segmentio.Properties)
+
+type Telemetry interface {
+ Record(command string, args ...TelemetryField)
+}
+
+type defaultTelemetry struct {
+ client segmentio.Client
+ uid string
+ enabled bool
+}
+
+const telemetryToken = "65WHA9bxCNX74K3HjgplMOmsio9LkYSI"
+
+var (
+ once sync.Once
+ telemetry *defaultTelemetry
+ telemetryConfigFile string
+)
+
+func init() {
+ home, err := os.UserHomeDir()
+ if err != nil {
+ panic(err)
+ }
+ telemetryConfigFile = filepath.Join(home, ".config", "openmodelz", "telemetry")
+}
+
+func GetTelemetry() Telemetry {
+ return telemetry
+}
+
+func Initialize(enabled bool) error {
+ once.Do(func() {
+ client, err := segmentio.NewWithConfig(telemetryToken, segmentio.Config{
+ BatchSize: 1,
+ })
+ if err != nil {
+ panic(err)
+ }
+ telemetry = &defaultTelemetry{
+ client: client,
+ enabled: enabled,
+ }
+ })
+ return telemetry.init()
+}
+
+func (t *defaultTelemetry) init() error {
+ if !t.enabled {
+ return nil
+ }
+ // detect if the config file already exists
+ _, err := os.Stat(telemetryConfigFile)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return errors.Wrap(err, "failed to stat telemetry config file")
+ }
+ t.uid = uuid.New().String()
+ return t.dumpConfig()
+ }
+ if err = t.loadConfig(); err != nil {
+ return errors.Wrap(err, "failed to load telemetry config")
+ }
+ t.Idnetify()
+ return nil
+}
+
+func (t *defaultTelemetry) dumpConfig() error {
+ if err := os.MkdirAll(filepath.Dir(telemetryConfigFile), os.ModeDir|0700); err != nil {
+ return errors.Wrap(err, "failed to create telemetry config directory")
+ }
+ file, err := os.Create(telemetryConfigFile)
+ if err != nil {
+ return errors.Wrap(err, "failed to create telemetry config file")
+ }
+ defer file.Close()
+ _, err = file.WriteString(t.uid)
+ if err != nil {
+ return errors.Wrap(err, "failed to write telemetry config file")
+ }
+ return nil
+}
+
+func (t *defaultTelemetry) loadConfig() error {
+ file, err := os.Open(telemetryConfigFile)
+ if err != nil {
+ return errors.Wrap(err, "failed to open telemetry config file")
+ }
+ defer file.Close()
+ uid, err := io.ReadAll(file)
+ if err != nil {
+ return errors.Wrap(err, "failed to read telemetry config file")
+ }
+ t.uid = string(uid)
+ return nil
+}
+
+func (t *defaultTelemetry) Idnetify() {
+ if !t.enabled {
+ return
+ }
+ v := version.GetOpenModelzVersion()
+ if err := t.client.Enqueue(segmentio.Identify{
+ UserId: t.uid,
+ Context: &segmentio.Context{
+ OS: segmentio.OSInfo{
+ Name: runtime.GOOS,
+ Version: runtime.GOARCH,
+ },
+ App: segmentio.AppInfo{
+ Name: "openmodelz",
+ Version: v,
+ },
+ },
+ Timestamp: time.Now(),
+ Traits: segmentio.NewTraits(),
+ }); err != nil {
+ logrus.WithError(err).Debug("failed to identify user")
+ return
+ }
+}
+
+func AddField(name string, value interface{}) TelemetryField {
+ return func(p *segmentio.Properties) {
+ p.Set(name, value)
+ }
+}
+
+func (t *defaultTelemetry) Record(command string, fields ...TelemetryField) {
+ if !t.enabled {
+ return
+ }
+ logrus.WithField("UID", t.uid).WithField("command", command).Debug("send telemetry")
+ track := segmentio.Track{
+ UserId: t.uid,
+ Event: command,
+ Properties: segmentio.NewProperties(),
+ }
+ for _, field := range fields {
+ field(&track.Properties)
+ }
+ if err := t.client.Enqueue(track); err != nil {
+ logrus.WithError(err).Debug("failed to send telemetry")
+ }
+ // make sure the msg can be sent out
+ t.client.Close()
+}
diff --git a/mdz/pkg/version/version.go b/mdz/pkg/version/version.go
index 20deaef..5bd3562 100644
--- a/mdz/pkg/version/version.go
+++ b/mdz/pkg/version/version.go
@@ -30,6 +30,8 @@ var (
// Package is filled at linking time
Package = "github.com/tensorchord/openmodelz/agent"
+ HelmChartVersion = "0.0.13"
+
// Revision is filled with the VCS (e.g. git) revision being used to build
// the program at linking time.
Revision = ""
@@ -42,7 +44,7 @@ var (
developmentFlag = "false"
)
-// Version contains envd version information
+// Version contains OpenModelz version information
type Version struct {
Version string
BuildDate string
@@ -63,8 +65,8 @@ func SetGitTagForE2ETest(tag string) {
gitTag = tag
}
-// GetEnvdVersion gets Envd version information
-func GetEnvdVersion() string {
+// GetOpenModelzVersion gets OpenModelz version information
+func GetOpenModelzVersion() string {
var versionStr string
if gitCommit != "" && gitTag != "" &&
@@ -95,7 +97,7 @@ func GetEnvdVersion() string {
// GetVersion returns the version information
func GetVersion() Version {
return Version{
- Version: GetEnvdVersion(),
+ Version: GetOpenModelzVersion(),
BuildDate: buildDate,
GitCommit: gitCommit,
GitTag: gitTag,
@@ -126,5 +128,5 @@ func UserAgent() string {
version = matches[0][1] + "-dev"
}
- return "envd/" + version
+ return "modelz/" + version
}
diff --git a/modelzetes/LICENSE b/modelzetes/LICENSE
index b2b0490..c820691 100644
--- a/modelzetes/LICENSE
+++ b/modelzetes/LICENSE
@@ -1,5 +1,6 @@
MIT License
+Copyright (c) 2023 TensorChord Inc.
Copyright (c) 2020 OpenFaaS Author(s)
Copyright (c) 2017 Alex Ellis
diff --git a/modelzetes/pkg/app/root.go b/modelzetes/pkg/app/root.go
index bfef40d..e98bf5d 100644
--- a/modelzetes/pkg/app/root.go
+++ b/modelzetes/pkg/app/root.go
@@ -21,7 +21,7 @@ const (
flagDebug = "debug"
// metrics
- flageMetricsServerPort = "metrics-server-port"
+ flagMetricsServerPort = "metrics-server-port"
// kubernetes
flagMasterURL = "master-url"
@@ -73,7 +73,7 @@ func New() App {
EnvVars: []string{"DEBUG"},
},
&cli.IntFlag{
- Name: flageMetricsServerPort,
+ Name: flagMetricsServerPort,
Value: 8081,
Usage: "port to listen on",
EnvVars: []string{"MODELZETES_SERVER_PORT"},
diff --git a/modelzetes/pkg/k8s/proxy.go b/modelzetes/pkg/k8s/proxy.go
index c05b24d..e5662f8 100644
--- a/modelzetes/pkg/k8s/proxy.go
+++ b/modelzetes/pkg/k8s/proxy.go
@@ -109,6 +109,6 @@ func (l *FunctionLookup) verifyNamespace(name string) error {
if name != "kube-system" {
return nil
}
- // ToDo use global namepace parse and validation
+ // ToDo use global namespace parse and validation
return fmt.Errorf("namespace not allowed")
}
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f333999
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,51 @@
+[project]
+name = "openmodelz"
+description = "Simplify machine learning deployment for any environments."
+readme = "README.md"
+authors = [
+ {name = "TensorChord", email = "modelz@tensorchord.ai"},
+]
+license = {text = "Apache-2.0"}
+keywords = ["machine learning", "deep learning", "model serving"]
+dynamic = ["version"]
+requires-python = ">=2.7"
+classifiers = [
+ "Environment :: GPU",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ "Topic :: Software Development :: Build Tools",
+]
+
+[project.urls]
+homepage = "https://modelz.ai/"
+documentation = "https://docs.open.modelz.ai/"
+repository = "https://github.com/tensorchord/openmodelz"
+changelog = "https://github.com/tensorchord/openmodelz/releases"
+
+[tool.cibuildwheel]
+build-frontend = "build"
+archs = ["auto64"]
+skip = "pp*" # skip pypy
+before-all = ""
+environment = { PIP_NO_CLEAN="yes" }
+before-build = "ls -la mdz/bin" # help to debug
+
+[project.optional-dependencies]
+
+[project.scripts]
+
+[build-system]
+requires = ["setuptools>=45", "wheel", "setuptools_scm"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+write_to = "mdz/_version.py"
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..eab70e2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,64 @@
+import os
+import subprocess
+import shlex
+from wheel.bdist_wheel import bdist_wheel
+from setuptools import setup, find_packages, Extension
+from setuptools.command.build_ext import build_ext
+from setuptools_scm import get_version
+
+
+with open("README.md", "r", encoding="utf-8") as f:
+ readme = f.read()
+
+class bdist_wheel_universal(bdist_wheel):
+ def get_tag(self):
+ *_, plat = super().get_tag()
+ return "py2.py3", "none", plat
+
+
+def build_if_not_exist():
+ if os.path.isfile("mdz/bin/mdz"):
+ return
+ version = get_version()
+ print(f"build mdz from source ({version})")
+ subprocess.call(["make", "mdz"])
+ errno = subprocess.call(shlex.split(
+ f"make build-release GIT_TAG={version}"
+ ), cwd="mdz")
+ assert errno == 0, f"mdz build failed with code {errno}"
+
+
+class ModelzExtension(Extension):
+ """A custom extension to define the OpenModelz extension."""
+
+
+class ModelzBuildExt(build_ext):
+ def build_extension(self, ext: Extension) -> None:
+ if not isinstance(ext, ModelzExtension):
+ return super().build_extension(ext)
+
+ build_if_not_exist()
+
+
+setup(
+ name="openmodelz",
+ use_scm_version=True,
+ description="Simplify machine learning deployment for any environments.",
+ long_description=readme,
+ long_description_content_type="text/markdown",
+ url="https://github.com/tensorchord/openmodelz",
+ license="Apache License 2.0",
+ author="TensorChord",
+ author_email="modelz@tensorchord.ai",
+ packages=find_packages("mdz"),
+ include_package_data=True,
+ data_files=[("bin", ["mdz/bin/mdz"])],
+ zip_safe=False,
+ ext_modules=[
+ ModelzExtension(name="mdz", sources=["mdz/*"]),
+ ],
+ cmdclass=dict(
+ build_ext=ModelzBuildExt,
+ bdist_wheel=bdist_wheel_universal,
+ ),
+)
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 0000000..b4298e3
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,5 @@
+[files]
+extend-exclude = ["CHANGELOG.md", "go.mod", "go.sum"]
+[default.extend-words]
+requestor = "requestor"
+ba = "ba"