diff --git a/.all-contributorsrc b/.all-contributorsrc index 6c094b5..d653e6a 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -70,6 +70,28 @@ "contributions": [ "ideas" ] + }, + { + "login": "Xuanwo", + "name": "Xuanwo", + "avatar_url": "https://avatars.githubusercontent.com/u/5351546?v=4", + "profile": "https://xuanwo.io/", + "contributions": [ + "content", + "design", + "ideas" + ] + }, + { + "login": "Zheaoli", + "name": "Nadeshiko Manju", + "avatar_url": "https://avatars.githubusercontent.com/u/7054676?v=4", + "profile": "http://manjusaka.itscoder.com/", + "contributions": [ + "bug", + "design", + "ideas" + ] } ], "contributorsPerLine": 7 diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 684f87e..ce8938b 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -21,6 +21,17 @@ concurrency: cancel-in-progress: true jobs: + typos-check: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + - name: Check spelling with custom config file + uses: crate-ci/typos@v1.16.2 + with: + config: ./typos.toml + test: name: test strategy: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index c552033..cb2a31c 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -1,9 +1,8 @@ name: release on: - push: - tags: - - 'v*' + release: + types: [published] pull_request: paths: - '.github/workflows/release.yml' @@ -36,3 +35,49 @@ jobs: args: release --rm-dist env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: upload gobin + uses: actions/upload-artifact@v3 + with: + name: gobin_${{ github.event.release.tag_name }} + retention-days: 1 + path: | + dist/mdz_linux_amd64_v1/mdz + if-no-files-found: error + pypi_publish: + needs: goreleaser + # only trigger on main repo when tag starts with v + if: github.repository == 'tensorchord/openmodelz' && startsWith(github.ref, 'refs/tags/v') + runs-on: ${{ matrix.os }} + timeout-minutes: 20 + strategy: + matrix: + os: [ubuntu-20.04] + steps: + - uses: actions/checkout@v3 + - name: Get gobin + uses: actions/download-artifact@v3 + with: + name: gobin_${{ github.event.release.tag_name }} + path: dist/ + - name: Configure linux build environment + if: runner.os == 'Linux' + run: | + mkdir -p mdz/bin + mv dist/mdz mdz/bin/mdz + chmod +x mdz/bin/mdz + - name: Build wheels + uses: pypa/cibuildwheel@v2.14.1 + - name: Build source distribution + if: runner.os == 'Linux' # Only release source under linux to avoid conflict + run: | + python -m pip install wheel setuptools_scm + python setup.py sdist + mv dist/*.tar.gz wheelhouse/ + - name: Upload to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python -m pip install --upgrade pip + python -m pip install twine + python -m twine upload wheelhouse/* diff --git a/.gitignore b/.gitignore index 849ddff..b40678e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,186 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +_version.txt +_version.py +wheelhouse/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +.ruff_cache/ +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a8abe23 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,15 @@ +prune autoscaler +prune ingress-operator +prune modelzetes +prune .github +include LICENSE +include README.md +include .goreleaser.yaml +include mdz/Makefile mdz/go.mod mdz/go.sum mdz/LICENSE +graft mdz/pkg +graft mdz/cmd +prune mdz/bin +prune mdz/docs +prune mdz/examples +graft agent/pkg +prune agent/bin diff --git a/README.md b/README.md index f34ff43..9cdfb91 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,16 @@ # OpenModelZ -Simplify machine learning deployment for any environment. - +One-click machine learning deployment at scale on any cluster (GCP, AWS, Lambda labs, your home lab, or even a single machine)

discord invitation link trackgit-views +docs all-contributors -

-OpenModelZ (MDZ) provides a simple CLI to deploy and manage your machine learning workloads on any cloud or home lab. - ## Why use OpenModelZ OpenModelZ is the ideal solution for practitioners who want to quickly deploy their machine learning models to a (public or private) endpoint without the hassle of spending excessive time, money, and effort to figure out the entire end-to-end process. @@ -27,28 +24,44 @@ We created OpenModelZ in response to the difficulties of finding a simple, cost- With OpenModelZ, we take care of the underlying technical details for you, and provide a simple and easy-to-use CLI to deploy your models to **any cloud (GCP, AWS, or others), your home lab, or even a single machine**. -You could **start from a single machine and scale it up to a cluster of machines** without any hassle. OpenModelZ lies at the heart of our [ModelZ](https://modelz.ai), which is a serverless inference platform. It's used in production to deploy models for our customers. +You could **start from a single machine and scale it up to a cluster of machines** without any hassle. Besides this, We **provision a separate subdomain for each deployment** without any extra cost and effort, making each deployment easily accessible from the outside. + +OpenModelZ forms the core of our [ModelZ](https://modelz.ai) platform, which is a serverless machine learning inference service. It is utilized in a production environment to provision models for our clients. ## Quick Start 🚀 -Once you've installed the `mdz` you can start deploying models and experimenting with them. +### Install `mdz` -There are only two concepts in `mdz`: +You can install OpenModelZ using the following command: -- **Deployment**: A deployment is a running inference service. You could configure the number of replicas, the port, and the image, and some other parameters. -- **Server**: A server is a machine that could run the deployments. It could be a cloud VM, a PC, or even a Raspberry Pi. You could start from a single server and scale it up to a cluster of machines without any hassle. +```text copy +pip install openmodelz +``` + +You could verify the installation by running the following command: + +```text copy +mdz +``` + +Once you've installed the `mdz` you can start deploying models and experimenting with them. ### Bootstrap `mdz` -It's super easy to bootstrap the `mdz` server. You just need to find a server (could be a cloud VM, a home lab, or even a single machine) and run the `mdz server start` command. The `mdz` server will be bootstrapped on the server and you could start deploying your models. +It's super easy to bootstrap the `mdz` server. You just need to find a server (could be a cloud VM, a home lab, or even a single machine) and run the `mdz server start` command. + +> Notice: We may require the root permission to bootstrap the `mdz` server on port 80. ``` $ mdz server start +🚧 Creating the server... +🚧 Initializing the load balancer... +🚧 Initializing the GPU resource... 🚧 Initializing the server... 🚧 Waiting for the server to be ready... 🐋 Checking if the server is running... Agent: - Version: v0.0.5 + Version: v0.0.13 Build Date: 2023-07-19T09:12:55Z Git Commit: 84d0171640453e9272f78a63e621392e93ef6bbb Git State: clean @@ -58,7 +71,7 @@ Agent: 🐳 The server is running at http://192.168.71.93.modelz.live 🎉 You could set the environment variable to get started! -export MDZ_AGENT=http://192.168.71.93.modelz.live +export MDZ_URL=http://192.168.71.93.modelz.live ``` The internal IP address will be used as the default endpoint of your deployments. You could provide the public IP address of your server to the `mdz server start` command to make it accessible from the outside world. @@ -68,36 +81,78 @@ The internal IP address will be used as the default endpoint of your deployments $ mdz server start 1.2.3.4 ``` -### Create your first deployment +You could also specify the registry mirror to speed up the image pulling process. Here is an example: + +```bash /--mirror-endpoints/ +$ mdz server start --mirror-endpoints https://docker.mirrors.sjtug.sjtu.edu.cn +``` + +### Create your first UI-based deployment -Once you've bootstrapped the `mdz` server, you can start deploying your first applications. +Once you've bootstrapped the `mdz` server, you can start deploying your first applications. We will use jupyter notebook as an example in this tutorial. You could use any docker image as your deployment. +```text +$ mdz deploy --image jupyter/minimal-notebook:lab-4.0.3 --name jupyter --port 8888 --command "jupyter notebook --ip='*' --NotebookApp.token='' --NotebookApp.password=''" +Inference jupyter is created +$ mdz list + NAME ENDPOINT STATUS INVOCATIONS REPLICAS + jupyter http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live Ready 488 1/1 + http://192.168.71.93/inference/jupyter.default ``` -$ mdz deploy --image aikain/simplehttpserver:0.1 --name simple-server --port 80 + +You could access the deployment by visiting the endpoint URL. The endpoint will be automatically generated for each deployment with the following format: `-..modelz.live`. + +It is `http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live` in this case. The endpoint could be accessed from the outside world as well if you've provided the public IP address of your server to the `mdz server start` command. + +![jupyter notebook](./images/jupyter.png) + +### Create your first OpenAI compatible API server + +You could also create API-based deployments. We will use [OpenAI compatible API server with Bloomz 560M](https://github.com/tensorchord/modelz-llm#run-the-self-hosted-api-server) as an example in this tutorial. + +```text +$ mdz deploy --image modelzai/llm-bloomz-560m:23.07.4 --name simple-server Inference simple-server is created $ mdz list - NAME ENDPOINT STATUS INVOCATIONS REPLICAS - simple-server http://simple-server-4k2epq5lynxbaayn.192.168.71.93.modelz.live Ready 2 1/1 - http://192.168.71.93.modelz.live/inference/simple-server.default + NAME ENDPOINT STATUS INVOCATIONS REPLICAS + jupyter http://jupyter-9pnxdkeb6jsfqkmq.192.168.71.93.modelz.live Ready 488 1/1 + http://192.168.71.93/inference/jupyter.default + simple-server http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live Ready 0 1/1 + http://192.168.71.93/inference/simple-server.default ``` -You could access the deployment by visiting the endpoint URL. It will be `http://simple-server-4k2epq5lynxbaayn.192.168.71.93.modelz.live` in this case. The endpoint could be accessed from the outside world as well if you've provided the public IP address of your server to the `mdz server start` command. +You could use OpenAI python package and the endpoint `http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live` in this case, to interact with the deployment. + +```python +import openai +openai.api_base="http://simple-server-lagn8m9m8648q6kx.192.168.71.93.modelz.live" +openai.api_key="any" + +# create a chat completion +chat_completion = openai.ChatCompletion.create(model="bloomz", messages=[ + {"role": "user", "content": "Who are you?"}, + {"role": "assistant", "content": "I am a student"}, + {"role": "user", "content": "What do you learn?"}, +], max_tokens=100) +``` ### Scale your deployment You could scale your deployment by using the `mdz scale` command. -```bash +```text /scale/ $ mdz scale simple-server --replicas 3 ``` -The requests will be load balanced between the replicas of your deployment. +The requests will be load balanced between the replicas of your deployment. + +You could also tell the `mdz` to **autoscale your deployment** based on the inflight requests. Please check out the [Autoscaling](https://docs.open.modelz.ai/deployment/autoscale) documentation for more details. ### Debug your deployment Sometimes you may want to debug your deployment. You could use the `mdz logs` command to get the logs of your deployment. -```bash +```text /logs/ $ mdz logs simple-server simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:16] "GET / HTTP/1.1" 200 - simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:16] "GET / HTTP/1.1" 200 - @@ -106,22 +161,23 @@ simple-server-6756dd67ff-4bf4g: 10.42.0.1 - - [27/Jul/2023 02:32:17] "GET / HTTP You could also use the `mdz exec` command to execute a command in the container of your deployment. You do not need to ssh into the server to do that. -``` +```text /exec/ $ mdz exec simple-server ps PID USER TIME COMMAND 1 root 0:00 /usr/bin/dumb-init /bin/sh -c python3 -m http.server 80 7 root 0:00 /bin/sh -c python3 -m http.server 80 8 root 0:00 python3 -m http.server 80 9 root 0:00 ps +``` + +```text /exec/ $ mdz exec simple-server -ti bash -bash-4.4# uname -r -5.19.0-46-generic bash-4.4# ``` Or you could port-forward the deployment to your local machine and debug it locally. -``` +```text /port-forward/ $ mdz port-forward simple-server 7860 Forwarding inference simple-server to local port 7860 ``` @@ -130,28 +186,27 @@ Forwarding inference simple-server to local port 7860 You could add more servers to your cluster by using the `mdz server join` command. The `mdz` server will be bootstrapped on the server and join the cluster automatically. -``` +```text /join/ +$ mdz server join $ mdz server list NAME PHASE ALLOCATABLE CAPACITY node1 Ready cpu: 16 cpu: 16 mem: 32784748Ki mem: 32784748Ki + gpu: 1 gpu: 1 node2 Ready cpu: 16 cpu: 16 mem: 32784748Ki mem: 32784748Ki + gpu: 1 gpu: 1 ``` ### Label your servers You could label your servers to deploy your models to specific servers. For example, you could label your servers with `gpu=true` and deploy your models to servers with GPUs. -``` +```text /--node-labels gpu=true,type=nvidia-a100/ $ mdz server label node3 gpu=true type=nvidia-a100 -$ mdz deploy --image aikain/simplehttpserver:0.1 --name simple-server --port 80 --node-labels gpu=true,type=nvidia-a100 +$ mdz deploy ... --node-labels gpu=true,type=nvidia-a100 ``` -## More on documentation 📝 - -See [OpenModelZ documentation](https://docs.open.modelz.ai/). - ## Roadmap 🗂️ Please checkout [ROADMAP](https://docs.open.modelz.ai/community). @@ -173,8 +228,12 @@ We welcome all kinds of contributions from the open-source community, individual Ce Gao
Ce Gao

💻 👀 Jinjing Zhou
Jinjing Zhou

💬 🐛 🤔 Keming
Keming

💻 🎨 🚇 + Nadeshiko Manju
Nadeshiko Manju

🐛 🎨 🤔 Teddy Xinyuan Chen
Teddy Xinyuan Chen

📖 + Xuanwo
Xuanwo

🖋 🎨 🤔 cutecutecat
cutecutecat

🤔 + + xieydd
xieydd

🤔 diff --git a/agent/README.md b/agent/README.md index 2f7d274..c3eeaf6 100644 --- a/agent/README.md +++ b/agent/README.md @@ -1,8 +1,20 @@ +
+ # OpenModelZ Agent -TODO: +
+ +

+discord invitation link +trackgit-views +

+ +## Installation + +``` +pip install openmodelz +``` + +## Architecture -- Make resolver optional -- proxy with use case -- mdz init -- ai lab +Please check out [Architecture](https://docs.open.modelz.ai/architecture) documentation. diff --git a/agent/client/log.go b/agent/client/log.go index e35fc49..13c60fd 100644 --- a/agent/client/log.go +++ b/agent/client/log.go @@ -12,13 +12,16 @@ import ( "net/url" "strings" + "github.com/sirupsen/logrus" "github.com/tensorchord/openmodelz/agent/api/types" ) +const LogBufferSize = 128 + // DeploymentLogGet gets the deployment logs. func (cli *Client) DeploymentLogGet(ctx context.Context, namespace, name string, - since string, tail int, end string) ( - []types.Message, error) { + since string, tail int, end string, follow bool) ( + <-chan types.Message, error) { urlValues := url.Values{} urlValues.Add("namespace", namespace) urlValues.Add("name", name) @@ -35,26 +38,34 @@ func (cli *Client) DeploymentLogGet(ctx context.Context, namespace, name string, urlValues.Add("tail", fmt.Sprintf("%d", tail)) } - resp, err := cli.get(ctx, "/system/logs/inference", urlValues, nil) - defer ensureReaderClosed(resp) + if follow { + urlValues.Add("follow", "true") + } + resp, err := cli.get(ctx, "/system/logs/inference", urlValues, nil) + if err != nil { - return nil, - wrapResponseError(err, resp, "deployment logs", name) + return nil, wrapResponseError(err, resp, "deployment logs", name) } - + + stream := make(chan types.Message, LogBufferSize) var log types.Message - logs := []types.Message{} scanner := bufio.NewScanner(resp.body) - for scanner.Scan() { - err = json.NewDecoder(strings.NewReader(scanner.Text())).Decode(&log) - if err != nil { - return nil, wrapResponseError(err, resp, "deployment logs", name) + go func () { + defer ensureReaderClosed(resp) + defer close(stream) + for scanner.Scan() { + err = json.Unmarshal(scanner.Bytes(), &log) + if err != nil { + logrus.Warnf("failed to decode %s log: %v | %s | [%s]", name, err, scanner.Text(), scanner.Err()) + return + // continue + } + stream <- log } - logs = append(logs, log) - } + }() - return logs, err + return stream, err } func (cli *Client) BuildLogGet(ctx context.Context, namespace, name, since string, diff --git a/agent/pkg/app/config.go b/agent/pkg/app/config.go index 19b41f8..73ab589 100644 --- a/agent/pkg/app/config.go +++ b/agent/pkg/app/config.go @@ -11,7 +11,7 @@ func configFromCLI(c *cli.Context) config.Config { // server cfg.Server.Dev = c.Bool(flagDev) - cfg.Server.ServerPort = c.Int(flageServerPort) + cfg.Server.ServerPort = c.Int(flagServerPort) cfg.Server.ReadTimeout = c.Duration(flagServerReadTimeout) cfg.Server.WriteTimeout = c.Duration(flagServerWriteTimeout) diff --git a/agent/pkg/app/root.go b/agent/pkg/app/root.go index e6bd2d8..871a326 100644 --- a/agent/pkg/app/root.go +++ b/agent/pkg/app/root.go @@ -21,7 +21,7 @@ const ( flagDev = "dev" // server - flageServerPort = "server-port" + flagServerPort = "server-port" flagServerReadTimeout = "server-read-timeout" flagServerWriteTimeout = "server-write-timeout" @@ -89,7 +89,7 @@ func New() App { Usage: "enable development mode", }, &cli.IntFlag{ - Name: flageServerPort, + Name: flagServerPort, Value: 8081, Usage: "port to listen on", EnvVars: []string{"MODELZ_AGENT_SERVER_PORT"}, diff --git a/agent/pkg/log/k8s.go b/agent/pkg/log/k8s.go index 0914ef7..417fbb4 100644 --- a/agent/pkg/log/k8s.go +++ b/agent/pkg/log/k8s.go @@ -2,7 +2,6 @@ package log import ( "bufio" - "bytes" "context" "fmt" "io" @@ -63,6 +62,9 @@ func (k *K8sAPIRequestor) Query(ctx context.Context, if err != nil { return nil, errdefs.InvalidParameter(err) } + } else if r.Follow { + // avoid truncate + endTime = time.Now().Add(time.Hour) } else { endTime = time.Now() } @@ -152,7 +154,7 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container, opts.SinceSeconds = parseSince(since) } - stream, err := i.GetLogs(pod, opts).Stream(context.TODO()) + stream, err := i.GetLogs(pod, opts).Stream(ctx) if err != nil { return err } @@ -160,14 +162,9 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container, done := make(chan error) go func() { - reader := bufio.NewReader(stream) - for { - line, err := reader.ReadBytes('\n') - if err != nil { - done <- err - return - } - msg, ts := extractTimestampAndMsg(string(bytes.Trim(line, "\x00"))) + scanner := bufio.NewScanner(stream) + for scanner.Scan() { + msg, ts := extractTimestampAndMsg(scanner.Text()) dst <- types.Message{ Timestamp: ts, Text: msg, @@ -176,13 +173,19 @@ func podLogs(ctx context.Context, i v1.PodInterface, pod, container, Namespace: namespace, } } + if err := scanner.Err(); err != nil { + done <- err + return + } }() select { case <-ctx.Done(): + logrus.Debug("get-log context cancelled") return ctx.Err() case err := <-done: if err != io.EOF { + logrus.Debugf("failed to read from pod log: %v", err) return err } return nil diff --git a/agent/pkg/prom/prometheus_query.go b/agent/pkg/prom/prometheus_query.go index 0019f48..2b293bf 100644 --- a/agent/pkg/prom/prometheus_query.go +++ b/agent/pkg/prom/prometheus_query.go @@ -77,7 +77,7 @@ func (q PrometheusQuery) Fetch(query string) (*VectorQueryResponse, error) { unmarshalErr := json.Unmarshal(bytesOut, &values) if unmarshalErr != nil { - return nil, fmt.Errorf("error unmarshaling result: %s, '%s'", unmarshalErr, string(bytesOut)) + return nil, fmt.Errorf("error unmarshalling result: %s, '%s'", unmarshalErr, string(bytesOut)) } return &values, nil diff --git a/agent/pkg/runtime/util_domain.go b/agent/pkg/runtime/util_domain.go index 3fac331..bc0429a 100644 --- a/agent/pkg/runtime/util_domain.go +++ b/agent/pkg/runtime/util_domain.go @@ -11,7 +11,7 @@ const ( ) const ( - // stdLen is a standard length of uniuri string to achive ~95 bits of entropy. + // stdLen is a standard length of uniuri string to achieve ~95 bits of entropy. stdLen = 16 ) diff --git a/agent/pkg/server/handler_inference_logs.go b/agent/pkg/server/handler_inference_logs.go index 5c19035..a0fc630 100644 --- a/agent/pkg/server/handler_inference_logs.go +++ b/agent/pkg/server/handler_inference_logs.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "net/http" + "time" "github.com/cockroachdb/errors" "github.com/gin-gonic/gin" @@ -46,8 +47,13 @@ func (s Server) getLogsFromRequester(c *gin.Context, requester log.Requester) er } _ = cn - ctx, cancelQuery := context.WithTimeout(c.Request.Context(), - s.config.Inference.LogTimeout) + timeout := s.config.Inference.LogTimeout + if req.Follow { + // use a much larger timeout for streaming log + timeout = time.Hour + } + + ctx, cancelQuery := context.WithTimeout(c.Request.Context(), timeout) defer cancelQuery() messages, err := requester.Query(ctx, req) diff --git a/agent/pkg/server/handler_root.go b/agent/pkg/server/handler_root.go new file mode 100644 index 0000000..0fc6039 --- /dev/null +++ b/agent/pkg/server/handler_root.go @@ -0,0 +1,16 @@ +package server + +import ( + "github.com/gin-gonic/gin" + "github.com/tensorchord/openmodelz/agent/pkg/server/static" +) + +func (s *Server) handleRoot(c *gin.Context) error { + lp, err := static.RenderLoadingPage() + if err != nil { + return err + } + + c.Data(200, "text/html; charset=utf-8", lp.Bytes()) + return nil +} diff --git a/agent/pkg/server/server_init_route.go b/agent/pkg/server/server_init_route.go index 4fbd0f9..9ba4ca1 100644 --- a/agent/pkg/server/server_init_route.go +++ b/agent/pkg/server/server_init_route.go @@ -38,6 +38,9 @@ func (s *Server) registerRoutes() { // healthz root.GET(endpointHealthz, WrapHandler(s.handleHealthz)) + // landing page + root.GET("/", WrapHandler(s.handleRoot)) + // control plane controlPlane := root.Group("/system") // inferences diff --git a/agent/pkg/server/static/index.html b/agent/pkg/server/static/index.html new file mode 100644 index 0000000..04d48d7 --- /dev/null +++ b/agent/pkg/server/static/index.html @@ -0,0 +1,339 @@ + + + + + + + + OpenModelZ Serving | Running + + + + +
+
+

+ OpenModelZ server is running + Version: {{.Version}} + +

+

+ Please check out the documentation + for the next steps. + + Please contact us + on discord if there is any issue. + +

+
+
+ + + diff --git a/agent/pkg/server/static/landing.go b/agent/pkg/server/static/landing.go new file mode 100644 index 0000000..5b49de3 --- /dev/null +++ b/agent/pkg/server/static/landing.go @@ -0,0 +1,34 @@ +package static + +import ( + "bytes" + _ "embed" + "html/template" + + "github.com/tensorchord/openmodelz/agent/pkg/version" +) + +//go:embed index.html +var htmlTemplate string + +type htmlStruct struct { + Version string +} + +func RenderLoadingPage() (*bytes.Buffer, error) { + tmpl, err := template.New("root").Parse(htmlTemplate) + if err != nil { + return nil, err + } + + data := htmlStruct{ + Version: version.GetAgentVersion(), + } + + var buffer bytes.Buffer + if err := tmpl.Execute(&buffer, data); err != nil { + return nil, err + } + + return &buffer, nil +} diff --git a/agent/pkg/version/version.go b/agent/pkg/version/version.go index 20deaef..ca631b7 100644 --- a/agent/pkg/version/version.go +++ b/agent/pkg/version/version.go @@ -63,8 +63,8 @@ func SetGitTagForE2ETest(tag string) { gitTag = tag } -// GetEnvdVersion gets Envd version information -func GetEnvdVersion() string { +// GetAgentVersion gets Envd version information +func GetAgentVersion() string { var versionStr string if gitCommit != "" && gitTag != "" && @@ -95,7 +95,7 @@ func GetEnvdVersion() string { // GetVersion returns the version information func GetVersion() Version { return Version{ - Version: GetEnvdVersion(), + Version: GetAgentVersion(), BuildDate: buildDate, GitCommit: gitCommit, GitTag: gitTag, diff --git a/autoscaler/pkg/autoscaler/scaler.go b/autoscaler/pkg/autoscaler/scaler.go index cd05c00..51d3598 100644 --- a/autoscaler/pkg/autoscaler/scaler.go +++ b/autoscaler/pkg/autoscaler/scaler.go @@ -306,7 +306,7 @@ func (s *Scaler) AutoScale(interval time.Duration) { "service": service, "replicas": totalReplicas, "expectedReplicas": expectedReplicas, - "availabelReplicas": availableReplicas, + "availableReplicas": availableReplicas, "currentLoad": lc.CurrentLoad, "targetLoad": targetLoad, "zeroDuration": zeroDuration, diff --git a/autoscaler/pkg/prom/prom.go b/autoscaler/pkg/prom/prom.go index 4436544..794c590 100644 --- a/autoscaler/pkg/prom/prom.go +++ b/autoscaler/pkg/prom/prom.go @@ -64,7 +64,7 @@ func (q PrometheusQuery) Fetch(query string) (*VectorQueryResponse, error) { unmarshalErr := json.Unmarshal(bytesOut, &values) if unmarshalErr != nil { - return nil, fmt.Errorf("error unmarshaling result: %s, '%s'", unmarshalErr, string(bytesOut)) + return nil, fmt.Errorf("error unmarshalling result: %s, '%s'", unmarshalErr, string(bytesOut)) } return &values, nil diff --git a/images/jupyter.png b/images/jupyter.png new file mode 100644 index 0000000..ea10f0f Binary files /dev/null and b/images/jupyter.png differ diff --git a/ingress-operator/LICENSE b/ingress-operator/LICENSE index 3804aa4..d53a9a9 100644 --- a/ingress-operator/LICENSE +++ b/ingress-operator/LICENSE @@ -1,5 +1,6 @@ MIT License +Copyright (c) 2023 TensorChord Inc. Copyright (c) 2017-2019 OpenFaaS Author(s) Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/ingress-operator/pkg/controller/v1/controller_test.go b/ingress-operator/pkg/controller/v1/controller_test.go index a38fd4b..b6ccee4 100644 --- a/ingress-operator/pkg/controller/v1/controller_test.go +++ b/ingress-operator/pkg/controller/v1/controller_test.go @@ -152,7 +152,7 @@ func Test_makeRules_Traefik_NestedPath_TrimsRegex_And_TrailingSlash(t *testing.T } } -func Test_makTLS(t *testing.T) { +func Test_makeTLS(t *testing.T) { cases := []struct { name string diff --git a/mdz/LICENSE b/mdz/LICENSE deleted file mode 100644 index e69de29..0000000 diff --git a/mdz/Makefile b/mdz/Makefile index 92c273b..2f73a9e 100644 --- a/mdz/Makefile +++ b/mdz/Makefile @@ -109,12 +109,12 @@ export GOFLAGS ?= -count=1 .DEFAULT_GOAL:=build -build: build-local ## Build the release version of envd +build: build-release ## Build the release version help: ## Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) -debug: debug-local ## Build the debug version of envd +debug: debug-local ## Build the debug version # more info about `GOGC` env: https://github.com/golangci/golangci-lint#memory-usage-of-golangci-lint lint: $(GOLANGCI_LINT) ## Lint GO code @@ -138,11 +138,22 @@ swag-install: build-local: @for target in $(TARGETS); do \ - CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build -tags $(DASHBOARD_BUILD) -trimpath -v -o $(OUTPUT_DIR)/$${target} \ + CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build -trimpath -v -o $(OUTPUT_DIR)/$${target} \ -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE)" \ $(CMD_DIR)/$${target}; \ done +build-release: + @for target in $(TARGETS); do \ + CGO_ENABLED=$(CGO_ENABLED) go build -trimpath -o $(OUTPUT_DIR)/$${target} \ + -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) \ + -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) \ + -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) \ + -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE) \ + -X $(ROOT)/pkg/version.gitTag=$(GIT_TAG)" \ + $(CMD_DIR)/$${target}; \ + done + # It is used by vscode to attach into the process. debug-local: @for target in $(TARGETS); do \ diff --git a/mdz/README.md b/mdz/README.md index a220607..6996112 100644 --- a/mdz/README.md +++ b/mdz/README.md @@ -13,7 +13,9 @@ CLI for OpenModelZ. ## Installation -TODO +``` +pip install openmodelz +``` ## CLI Reference diff --git a/mdz/go.mod b/mdz/go.mod index c83dd38..b8fbbdb 100644 --- a/mdz/go.mod +++ b/mdz/go.mod @@ -70,6 +70,8 @@ require ( github.com/rivo/uniseg v0.2.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/segmentio/analytics-go/v3 v3.2.1 // indirect + github.com/segmentio/backo-go v1.0.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/swaggo/swag v1.8.12 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect @@ -82,5 +84,6 @@ require ( golang.org/x/text v0.9.0 // indirect golang.org/x/tools v0.7.0 // indirect google.golang.org/protobuf v1.30.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/mdz/go.sum b/mdz/go.sum index 91cde3a..63fefe9 100644 --- a/mdz/go.sum +++ b/mdz/go.sum @@ -155,6 +155,10 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/segmentio/analytics-go/v3 v3.2.1 h1:G+f90zxtc1p9G+WigVyTR0xNfOghOGs/PYAlljLOyeg= +github.com/segmentio/analytics-go/v3 v3.2.1/go.mod h1:p8owAF8X+5o27jmvUognuXxdtqvSGtD0ZrfY2kcS9bE= +github.com/segmentio/backo-go v1.0.0 h1:kbOAtGJY2DqOR0jfRkYEorx/b18RgtepGtY3+Cpe6qA= +github.com/segmentio/backo-go v1.0.0/go.mod h1:kJ9mm9YmoWSkk+oQ+5Cj8DEoRCX2JT6As4kEtIIOp1M= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= @@ -271,6 +275,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/mdz/pkg/cmd/deploy.go b/mdz/pkg/cmd/deploy.go index 443ec5f..6b8cfb3 100644 --- a/mdz/pkg/cmd/deploy.go +++ b/mdz/pkg/cmd/deploy.go @@ -9,6 +9,8 @@ import ( petname "github.com/dustinkirkland/golang-petname" "github.com/spf13/cobra" "github.com/tensorchord/openmodelz/agent/api/types" + "github.com/tensorchord/openmodelz/agent/client" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) var ( @@ -21,6 +23,8 @@ var ( deployGPU int deployNodeLabel []string deployDetach bool + deployCommand string + deployProbePath string ) // deployCmd represents the deploy command @@ -55,9 +59,11 @@ func init() { deployCmd.Flags().StringVar(&deployName, "name", "", "Name of inference") deployCmd.Flags().StringSliceVarP(&deployNodeLabel, "node-labels", "l", []string{}, "Node labels") deployCmd.Flags().BoolVar(&deployDetach, "detach", false, "If set, the command returns immediately without waiting for the deployment to complete") + deployCmd.Flags().StringVar(&deployCommand, "command", "", "Command to run") + deployCmd.Flags().StringVar(&deployProbePath, "probe-path", "", "HTTP Health probe path") } -func waitForDeploymentReady(cmd *cobra.Command, client *Client, namespace, name string, interval time.Duration, timeoutSeconds int) error { +func waitForDeploymentReady(cmd *cobra.Command, client *client.Client, namespace, name string, interval time.Duration, timeoutSeconds int) error { timeout := time.After(time.Duration(timeoutSeconds) * time.Second) tick := time.Tick(interval) @@ -115,6 +121,13 @@ func commandDeploy(cmd *cobra.Command, args []string) error { }, } + if deployCommand != "" { + inf.Spec.Command = &deployCommand + } + if deployProbePath != "" { + inf.Spec.HTTPProbePath = &deployProbePath + } + if len(deployNodeLabel) > 0 { inf.Spec.Constraints = []string{} for _, label := range deployNodeLabel { @@ -132,6 +145,12 @@ func commandDeploy(cmd *cobra.Command, args []string) error { } } + telemetry.GetTelemetry().Record( + "deploy", + telemetry.AddField("GPU", deployGPU), + telemetry.AddField("FromZero", deployMinReplicas == 0), + ) + if _, err := agentClient.InferenceCreate( cmd.Context(), namespace, inf); err != nil { cmd.PrintErrf("Failed to create the inference: %s\n", errors.Cause(err)) diff --git a/mdz/pkg/cmd/list.go b/mdz/pkg/cmd/list.go index 60573f1..cd2d2c8 100644 --- a/mdz/pkg/cmd/list.go +++ b/mdz/pkg/cmd/list.go @@ -8,6 +8,7 @@ import ( "github.com/jedib0t/go-pretty/v6/table" "github.com/spf13/cobra" "github.com/tensorchord/openmodelz/agent/api/types" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) const ( @@ -48,6 +49,7 @@ func init() { } func commandList(cmd *cobra.Command, args []string) error { + telemetry.GetTelemetry().Record("list") infs, err := agentClient.InferenceList(cmd.Context(), namespace) if err != nil { cmd.PrintErrf("Failed to list inferences: %v\n", err) diff --git a/mdz/pkg/cmd/logs.go b/mdz/pkg/cmd/logs.go index 53bcdd2..d0a3e30 100644 --- a/mdz/pkg/cmd/logs.go +++ b/mdz/pkg/cmd/logs.go @@ -9,6 +9,7 @@ var ( tail int since string end string + follow bool ) // logCmd represents the log command @@ -36,15 +37,16 @@ func init() { logsCmd.Flags().IntVarP(&tail, "tail", "t", 0, "Number of lines to show from the end of the logs") logsCmd.Flags().StringVarP(&since, "since", "s", "2006-01-02T15:04:05Z", "Show logs since timestamp (e.g. 2013-01-02T13:23:37Z) or relative (e.g. 42m for 42 minutes)") logsCmd.Flags().StringVarP(&end, "end", "e", "", "Only return logs before this timestamp (e.g. 2013-01-02T13:23:37Z) or relative (e.g. 42m for 42 minutes)") + logsCmd.Flags().BoolVarP(&follow, "follow", "f", false, "Follow log output") } func commandLogs(cmd *cobra.Command, args []string) error { - logs, err := agentClient.DeploymentLogGet(cmd.Context(), namespace, args[0], since, tail, end) + logStream, err := agentClient.DeploymentLogGet(cmd.Context(), namespace, args[0], since, tail, end, follow) if err != nil { cmd.PrintErrf("Failed to get logs: %s\n", err) return err } - for _, log := range logs { + for log := range logStream { cmd.Printf("%s: %s\n", log.Instance, log.Text) } return nil diff --git a/mdz/pkg/cmd/root.go b/mdz/pkg/cmd/root.go index 6c4f46b..82d02bb 100644 --- a/mdz/pkg/cmd/root.go +++ b/mdz/pkg/cmd/root.go @@ -9,13 +9,15 @@ import ( "github.com/spf13/cobra/doc" "github.com/tensorchord/openmodelz/agent/client" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) var ( // Used for flags. - mdzURL string - namespace string - debug bool + mdzURL string + namespace string + debug bool + disableTelemetry bool agentClient *client.Client ) @@ -62,6 +64,8 @@ func init() { rootCmd.PersistentFlags().BoolVarP(&debug, "debug", "", false, "Enable debug logging") + rootCmd.PersistentFlags().BoolVarP(&disableTelemetry, "disable-telemetry", "", false, "Disable anonymous telemetry") + // Cobra also supports local flags, which will only run // when this action is called directly. rootCmd.AddGroup(&cobra.Group{ID: "basic", Title: "Basic Commands:"}) @@ -76,8 +80,8 @@ func commandInit(cmd *cobra.Command, args []string) error { if agentClient == nil { if mdzURL == "" { - // Checkout environment variable MDZ_AGENT. - mdzURL = os.Getenv("MDZ_AGENT") + // Checkout environment variable MDZ_URL. + mdzURL = os.Getenv("MDZ_URL") } if mdzURL == "" { mdzURL = "http://localhost:80" @@ -89,6 +93,10 @@ func commandInit(cmd *cobra.Command, args []string) error { return err } } + + if err := telemetry.Initialize(!disableTelemetry); err != nil { + logrus.WithError(err).Debug("Failed to initialize telemetry") + } return nil } diff --git a/mdz/pkg/cmd/scale.go b/mdz/pkg/cmd/scale.go index 5d97b73..eb63700 100644 --- a/mdz/pkg/cmd/scale.go +++ b/mdz/pkg/cmd/scale.go @@ -2,6 +2,7 @@ package cmd import ( "github.com/spf13/cobra" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) var ( @@ -73,6 +74,8 @@ func commandScale(cmd *cobra.Command, args []string) error { deployment.Spec.Scaling.TargetLoad = int32Ptr(targetInflightRequests) } + telemetry.GetTelemetry().Record("scale") + if _, err := agentClient.DeploymentUpdate(cmd.Context(), namespace, deployment); err != nil { cmd.PrintErrf("Failed to update deployment: %s\n", err) return err diff --git a/mdz/pkg/cmd/server.go b/mdz/pkg/cmd/server.go index 1ef51e3..bf078c5 100644 --- a/mdz/pkg/cmd/server.go +++ b/mdz/pkg/cmd/server.go @@ -7,8 +7,10 @@ import ( ) var ( - serverVerbose bool - serverPollingInterval time.Duration + serverVerbose bool + serverPollingInterval time.Duration = 3 * time.Second + serverRegistryMirrorName string + serverRegistryMirrorEndpoints []string ) // serverCmd represents the server command @@ -29,7 +31,6 @@ func init() { // Cobra supports Persistent Flags which will work for this command // and all subcommands, e.g.: serverCmd.PersistentFlags().BoolVarP(&serverVerbose, "verbose", "v", false, "Verbose output") - serverCmd.PersistentFlags().DurationVarP(&serverPollingInterval, "polling-interval", "p", 3*time.Second, "Polling interval") // Cobra supports local flags which will only run when this command // is called directly, e.g.: diff --git a/mdz/pkg/cmd/server_destroy.go b/mdz/pkg/cmd/server_destroy.go new file mode 100644 index 0000000..36792c9 --- /dev/null +++ b/mdz/pkg/cmd/server_destroy.go @@ -0,0 +1,50 @@ +package cmd + +import ( + "github.com/cockroachdb/errors" + "github.com/spf13/cobra" + + "github.com/tensorchord/openmodelz/mdz/pkg/server" +) + +// serverDestroyCmd represents the server destroy command +var serverDestroyCmd = &cobra.Command{ + Use: "destroy", + Short: "Destroy the cluster", + Long: `Destroy the cluster`, + Example: ` mdz server destroy`, + PreRunE: commandInitLog, + RunE: commandServerDestroy, +} + +func init() { + serverCmd.AddCommand(serverDestroyCmd) + + // Here you will define your flags and configuration settings. + + // Cobra supports Persistent Flags which will work for this command + // and all subcommands, e.g.: + + // Cobra supports local flags which will only run when this command + // is called directly, e.g.: +} + +func commandServerDestroy(cmd *cobra.Command, args []string) error { + engine, err := server.NewDestroy(server.Options{ + Verbose: serverVerbose, + OutputStream: cmd.ErrOrStderr(), + RetryInternal: serverPollingInterval, + }) + if err != nil { + cmd.PrintErrf("Failed to destroy the server: %s\n", errors.Cause(err)) + return err + } + + _, err = engine.Run() + if err != nil { + cmd.PrintErrf("Failed to destroy the server: %s\n", errors.Cause(err)) + return err + } + cmd.Printf("✅ Server destroyed\n") + return nil +} diff --git a/mdz/pkg/cmd/server_join.go b/mdz/pkg/cmd/server_join.go index f5c2316..f996a24 100644 --- a/mdz/pkg/cmd/server_join.go +++ b/mdz/pkg/cmd/server_join.go @@ -5,6 +5,7 @@ import ( "github.com/spf13/cobra" "github.com/tensorchord/openmodelz/mdz/pkg/server" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) // serverJoinCmd represents the server join command @@ -28,6 +29,10 @@ func init() { // Cobra supports local flags which will only run when this command // is called directly, e.g.: + serverJoinCmd.Flags().StringVarP(&serverRegistryMirrorName, "mirror-name", "", + "docker.io", "Mirror domain name of the registry") + serverJoinCmd.Flags().StringArrayVarP(&serverRegistryMirrorEndpoints, "mirror-endpoints", "", + []string{}, "Mirror URL endpoints of the registry like `https://quay.io`") } func commandServerJoin(cmd *cobra.Command, args []string) error { @@ -36,12 +41,18 @@ func commandServerJoin(cmd *cobra.Command, args []string) error { OutputStream: cmd.ErrOrStderr(), RetryInternal: serverPollingInterval, ServerIP: args[0], + Mirror: server.Mirror{ + Name: serverRegistryMirrorName, + Endpoints: serverRegistryMirrorEndpoints, + }, }) if err != nil { - cmd.PrintErrf("Failed to join the cluster: %s\n", errors.Cause(err)) + cmd.PrintErrf("Failed to configure before join: %s\n", errors.Cause(err)) return err } + telemetry.GetTelemetry().Record("server join") + _, err = engine.Run() if err != nil { cmd.PrintErrf("Failed to join the cluster: %s\n", errors.Cause(err)) diff --git a/mdz/pkg/cmd/server_list.go b/mdz/pkg/cmd/server_list.go index 6bb0f1d..7f05c50 100644 --- a/mdz/pkg/cmd/server_list.go +++ b/mdz/pkg/cmd/server_list.go @@ -2,11 +2,16 @@ package cmd import ( "fmt" + "math" "github.com/cockroachdb/errors" "github.com/jedib0t/go-pretty/v6/table" + "github.com/sirupsen/logrus" "github.com/spf13/cobra" + "k8s.io/apimachinery/pkg/api/resource" + "github.com/tensorchord/openmodelz/agent/api/types" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" ) var ( @@ -40,6 +45,7 @@ func init() { } func commandServerList(cmd *cobra.Command, args []string) error { + telemetry.GetTelemetry().Record("server list") servers, err := agentClient.ServerList(cmd.Context()) if err != nil { cmd.PrintErrf("Failed to list servers: %s\n", errors.Cause(err)) @@ -100,13 +106,41 @@ func labelsString(labels map[string]string) string { for k, v := range labels { res += fmt.Sprintf("%s=%s\n", k, v) } + if len(res) == 0 { + return res + } return res[:len(res)-1] } +func prettyByteSize(quantity string) (string, error) { + r, err := resource.ParseQuantity(quantity) + if err != nil { + return "", err + } + bf := float64(r.Value()) + for _, unit := range []string{"", "Ki", "Mi", "Gi", "Ti"} { + if math.Abs(bf) < 1024.0 { + return fmt.Sprintf("%3.1f%sB", bf, unit), nil + } + bf /= 1024.0 + } + return fmt.Sprintf("%.1fPiB", bf), nil +} + func resourceListString(l types.ResourceList) string { - res := fmt.Sprintf("cpu: %s\nmem: %s", l["cpu"], l["memory"]) - if l["nvidia.com/gpu"] != "" { - res += fmt.Sprintf("\ngpu: %s", l["nvidia.com/gpu"]) + res := fmt.Sprintf("cpu: %s", l[types.ResourceCPU]) + memory, ok := l[types.ResourceMemory] + if ok { + prettyMem, err := prettyByteSize(string(memory)) + if err != nil { + logrus.Infof("failed to parse the memory quantity: %s", memory) + } else { + memory = types.Quantity(prettyMem) + } + } + res += fmt.Sprintf("\nmemory: %s", memory) + if l[types.ResourceGPU] != "" { + res += fmt.Sprintf("\ngpu: %s", l[types.ResourceGPU]) } return res } diff --git a/mdz/pkg/cmd/server_start.go b/mdz/pkg/cmd/server_start.go index 4332f65..468100f 100644 --- a/mdz/pkg/cmd/server_start.go +++ b/mdz/pkg/cmd/server_start.go @@ -9,11 +9,15 @@ import ( "github.com/tensorchord/openmodelz/agent/pkg/consts" "github.com/tensorchord/openmodelz/mdz/pkg/server" + "github.com/tensorchord/openmodelz/mdz/pkg/telemetry" + "github.com/tensorchord/openmodelz/mdz/pkg/version" ) var ( serverStartRuntime string serverStartDomain string = consts.Domain + serverStartVersion string + serverStartWithGPU bool ) // serverStartCmd represents the server start command @@ -40,6 +44,15 @@ func init() { // Cobra supports local flags which will only run when this command // is called directly, e.g.: // serverStartCmd.Flags().StringVarP(&serverStartRuntime, "runtime", "r", "k3s", "Runtime to use (k3s, docker) in the started server") + serverStartCmd.Flags().StringVarP(&serverStartVersion, "version", "", + version.HelmChartVersion, "Version of the server to start") + serverStartCmd.Flags().MarkHidden("version") + serverStartCmd.Flags().BoolVarP(&serverStartWithGPU, "force-gpu", "g", + false, "Start the server with GPU support (ignore the GPU detection)") + serverStartCmd.Flags().StringVarP(&serverRegistryMirrorName, "mirror-name", "", + "docker.io", "Mirror domain name of the registry") + serverStartCmd.Flags().StringArrayVarP(&serverRegistryMirrorEndpoints, "mirror-endpoints", "", + []string{}, "Mirror URL endpoints of the registry like `https://quay.io`") } func commandServerStart(cmd *cobra.Command, args []string) error { @@ -48,12 +61,24 @@ func commandServerStart(cmd *cobra.Command, args []string) error { domainWithSuffix := fmt.Sprintf("%s.%s", args[0], serverStartDomain) domain = &domainWithSuffix } + defer func(start time.Time) { + telemetry.GetTelemetry().Record( + "server start", + telemetry.AddField("duration", time.Since(start).Seconds()), + ) + }(time.Now()) engine, err := server.NewStart(server.Options{ Verbose: serverVerbose, Runtime: server.Runtime(serverStartRuntime), OutputStream: cmd.ErrOrStderr(), RetryInternal: serverPollingInterval, Domain: domain, + Version: serverStartVersion, + ForceGPU: serverStartWithGPU, + Mirror: server.Mirror{ + Name: serverRegistryMirrorName, + Endpoints: serverRegistryMirrorEndpoints, + }, }) if err != nil { cmd.PrintErrf("Failed to start the server: %s\n", errors.Cause(err)) @@ -83,6 +108,6 @@ func commandServerStart(cmd *cobra.Command, args []string) error { } cmd.Printf("🐳 The server is running at %s\n", mdzURL) cmd.Printf("🎉 You could set the environment variable to get started!\n\n") - cmd.Printf("export MDZ_AGENT=%s\n", mdzURL) + cmd.Printf("export MDZ_URL=%s\n", mdzURL) return nil } diff --git a/mdz/pkg/cmd/version.go b/mdz/pkg/cmd/version.go index 72df358..f6650a2 100644 --- a/mdz/pkg/cmd/version.go +++ b/mdz/pkg/cmd/version.go @@ -53,8 +53,6 @@ func printServerVersion(cmd *cobra.Command) error { } cmd.Println("Server:") - cmd.Printf(" Name: \t\t%s\n", info.Name) - cmd.Printf(" Orchestration: %s\n", info.Orchestration) cmd.Printf(" Version: \t%s\n", info.Version.Version) cmd.Printf(" Build Date: \t%s\n", info.Version.BuildDate) cmd.Printf(" Git Commit: \t%s\n", info.Version.GitCommit) diff --git a/mdz/pkg/server/agentd_run.go b/mdz/pkg/server/agentd_run.go index 2bef043..9c16080 100644 --- a/mdz/pkg/server/agentd_run.go +++ b/mdz/pkg/server/agentd_run.go @@ -11,7 +11,7 @@ type agentDRunStep struct { } // TODO(gaocegege): There is still a bug, thus it cannot be used actually. -// The process wil exit after the command returns. We need to put it in systemd. +// The process will exit after the command returns. We need to put it in systemd. func (s *agentDRunStep) Run() error { fmt.Fprintf(s.options.OutputStream, "🚧 Running the agent for docker runtime...\n") cmd := exec.Command("/bin/sh", "-c", "mdz local-agent &") diff --git a/mdz/pkg/server/engine.go b/mdz/pkg/server/engine.go index ace9d7e..f92ccec 100644 --- a/mdz/pkg/server/engine.go +++ b/mdz/pkg/server/engine.go @@ -14,9 +14,21 @@ type Options struct { Verbose bool OutputStream io.Writer Runtime Runtime + Mirror Mirror RetryInternal time.Duration ServerIP string Domain *string + Version string + ForceGPU bool +} + +type Mirror struct { + Name string + Endpoints []string +} + +func (m *Mirror) Configured() bool { + return m.Name != "" && len(m.Endpoints) > 0 } type Runtime string @@ -36,6 +48,9 @@ type Result struct { } func NewStart(o Options) (*Engine, error) { + if o.Verbose { + fmt.Fprintf(o.OutputStream, "Starting the server with config: %+v\n", o) + } var engine *Engine switch o.Runtime { case RuntimeDocker: @@ -52,6 +67,9 @@ func NewStart(o Options) (*Engine, error) { options: o, Steps: []Step{ // Install k3s and related tools. + &k3sPrepare{ + options: o, + }, &k3sInstallStep{ options: o, }, @@ -82,11 +100,22 @@ func NewStop(o Options) (*Engine, error) { }, nil } +func NewDestroy(o Options) (*Engine, error) { + return &Engine{ + options: o, + Steps: []Step{ + // Destroy all k3s and related tools. + &k3sDestroyAllStep{ + options: o, + }, + }, + }, nil +} + func NewJoin(o Options) (*Engine, error) { return &Engine{ options: o, Steps: []Step{ - // Kill all k3s and related tools. &k3sJoinStep{ options: o, }, @@ -118,6 +147,12 @@ func (e *Engine) Run() (*Result, error) { MDZURL: fmt.Sprintf("http://%s", *e.options.Domain), }, nil } + // Get the server IP. + if resultDomain != "" { + return &Result{ + MDZURL: fmt.Sprintf("http://%s", resultDomain), + }, nil + } return &Result{ MDZURL: fmt.Sprintf("http://0.0.0.0:%d", AgentPort), }, nil diff --git a/mdz/pkg/server/gpu_install.go b/mdz/pkg/server/gpu_install.go index 8580c79..a3f2786 100644 --- a/mdz/pkg/server/gpu_install.go +++ b/mdz/pkg/server/gpu_install.go @@ -4,7 +4,10 @@ import ( _ "embed" "fmt" "io" + "os" "os/exec" + "path/filepath" + "regexp" "syscall" ) @@ -16,7 +19,44 @@ type gpuInstallStep struct { options Options } +// check if the Nvidia Toolkit is installed on the host +func (s *gpuInstallStep) hasNvidiaToolkit() bool { + locations := []string{ + "/usr/local/nvidia/toolkit", + "/usr/bin", + } + binaryNames := []string{ + "nvidia-container-runtime", + "nvidia-container-runtime-experimental", + } + for _, location := range locations { + for _, name := range binaryNames { + path := filepath.Join(location, name) + if _, err := os.Stat(path); err == nil { + return true + } + } + } + return false +} + +func (s *gpuInstallStep) hasNvidiaDevice() bool { + output, err := exec.Command("/bin/sh", "-c", "lspci").Output() + if err != nil { + return false + } + regexNvidia := regexp.MustCompile("(?i)nvidia") + return regexNvidia.Match(output) +} + func (s *gpuInstallStep) Run() error { + if !s.options.ForceGPU { + // detect GPU + if !(s.hasNvidiaDevice() || s.hasNvidiaToolkit()) { + fmt.Fprintf(s.options.OutputStream, "🚧 Nvidia Toolkit is missing, skip the GPU initialization.\n") + return nil + } + } fmt.Fprintf(s.options.OutputStream, "🚧 Initializing the GPU resource...\n") cmd := exec.Command("/bin/sh", "-c", "sudo k3s kubectl apply -f -") @@ -41,6 +81,8 @@ func (s *gpuInstallStep) Run() error { if _, err := io.WriteString(stdin, gpuYamlContent); err != nil { return err } + // Close the input stream to finish the pipe. Then the command will use the + // input from the pipe to start the next process. stdin.Close() if err := cmd.Wait(); err != nil { diff --git a/mdz/pkg/server/k3s_destroy.go b/mdz/pkg/server/k3s_destroy.go new file mode 100644 index 0000000..38158b3 --- /dev/null +++ b/mdz/pkg/server/k3s_destroy.go @@ -0,0 +1,38 @@ +package server + +import ( + "fmt" + "os/exec" + "syscall" +) + +// k3sDestroyAllStep installs k3s and related tools. +type k3sDestroyAllStep struct { + options Options +} + +func (s *k3sDestroyAllStep) Run() error { + fmt.Fprintf(s.options.OutputStream, "🚧 Destroy the OpenModelz Cluster...\n") + // TODO(gaocegege): Embed the script into the binary. + cmd := exec.Command("/bin/sh", "-c", "/usr/local/bin/k3s-uninstall.sh") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGKILL, + } + if s.options.Verbose { + cmd.Stderr = s.options.OutputStream + cmd.Stdout = s.options.OutputStream + } else { + cmd.Stdout = nil + cmd.Stderr = nil + } + err := cmd.Run() + if err != nil { + return err + } + + return nil +} + +func (s *k3sDestroyAllStep) Verify() error { + return nil +} diff --git a/mdz/pkg/server/k3s_install.go b/mdz/pkg/server/k3s_install.go index 0144b2e..6a823ec 100644 --- a/mdz/pkg/server/k3s_install.go +++ b/mdz/pkg/server/k3s_install.go @@ -57,6 +57,8 @@ func (s *k3sInstallStep) Run() error { if _, err := io.WriteString(stdin, bashContent); err != nil { return err } + // Close the input stream to finish the pipe. Then the command will use the + // input from the pipe to start the next process. stdin.Close() fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be created...\n") diff --git a/mdz/pkg/server/k3s_join.go b/mdz/pkg/server/k3s_join.go index a5c79ae..253365f 100644 --- a/mdz/pkg/server/k3s_join.go +++ b/mdz/pkg/server/k3s_join.go @@ -49,6 +49,8 @@ func (s *k3sJoinStep) Run() error { if _, err := io.WriteString(stdin, bashContent); err != nil { return err } + // Close the input stream to finish the pipe. Then the command will use the + // input from the pipe to start the next process. stdin.Close() fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be ready...\n") diff --git a/mdz/pkg/server/k3s_killall.go b/mdz/pkg/server/k3s_killall.go index 43f2614..b797d31 100644 --- a/mdz/pkg/server/k3s_killall.go +++ b/mdz/pkg/server/k3s_killall.go @@ -12,7 +12,7 @@ type k3sKillAllStep struct { } func (s *k3sKillAllStep) Run() error { - fmt.Fprintf(s.options.OutputStream, "🚧 Stopping all the processes...\n") + fmt.Fprintf(s.options.OutputStream, "🚧 Stopping the OpenModelz Cluster...\n") // TODO(gaocegege): Embed the script into the binary. cmd := exec.Command("/bin/sh", "-c", "/usr/local/bin/k3s-killall.sh") cmd.SysProcAttr = &syscall.SysProcAttr{ diff --git a/mdz/pkg/server/k3s_prepare.go b/mdz/pkg/server/k3s_prepare.go new file mode 100644 index 0000000..2e3d77d --- /dev/null +++ b/mdz/pkg/server/k3s_prepare.go @@ -0,0 +1,66 @@ +package server + +import ( + _ "embed" + "fmt" + "os/exec" + "path/filepath" + "strings" + "syscall" + "text/template" +) + +//go:embed registries.yaml +var registriesContent string + +const mirrorPath = "/etc/rancher/k3s" +const mirrorFile = "registries.yaml" + +// k3sPrepare install everything required by k3s. +type k3sPrepare struct { + options Options +} + +func (s *k3sPrepare) Run() error { + if !s.options.Mirror.Configured() { + return nil + } + fmt.Fprintf(s.options.OutputStream, "🚧 Configure the mirror...\n") + + tmpl, err := template.New("registries").Parse(registriesContent) + if err != nil { + panic(err) + } + buf := strings.Builder{} + err = tmpl.Execute(&buf, s.options.Mirror) + if err != nil { + panic(err) + } + + cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf( + "sudo mkdir -p %s && sudo tee %s > /dev/null << EOF\n%s\nEOF", + mirrorPath, + filepath.Join(mirrorPath, mirrorFile), + buf.String(), + )) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGKILL, + } + if s.options.Verbose { + cmd.Stderr = s.options.OutputStream + cmd.Stdout = s.options.OutputStream + } else { + cmd.Stdout = nil + cmd.Stderr = nil + } + err = cmd.Run() + if err != nil { + return err + } + + return nil +} + +func (s *k3sPrepare) Verify() error { + return nil +} diff --git a/mdz/pkg/server/nginx_install.go b/mdz/pkg/server/nginx_install.go index 5841749..651b4e4 100644 --- a/mdz/pkg/server/nginx_install.go +++ b/mdz/pkg/server/nginx_install.go @@ -42,6 +42,8 @@ func (s *nginxInstallStep) Run() error { if _, err := io.WriteString(stdin, nginxYamlContent); err != nil { return err } + // Close the input stream to finish the pipe. Then the command will use the + // input from the pipe to start the next process. stdin.Close() if err := cmd.Wait(); err != nil { diff --git a/mdz/pkg/server/openmodelz.yaml b/mdz/pkg/server/openmodelz.yaml index 32135f8..94e148c 100644 --- a/mdz/pkg/server/openmodelz.yaml +++ b/mdz/pkg/server/openmodelz.yaml @@ -14,7 +14,7 @@ spec: chart: openmodelz repo: https://tensorchord.github.io/openmodelz-charts targetNamespace: openmodelz - version: 0.0.8 + version: {{.Version}} set: valuesContent: |- fullnameOverride: openmodelz diff --git a/mdz/pkg/server/openmodelz_install.go b/mdz/pkg/server/openmodelz_install.go index 97030ca..f5a4e0a 100644 --- a/mdz/pkg/server/openmodelz_install.go +++ b/mdz/pkg/server/openmodelz_install.go @@ -6,6 +6,7 @@ import ( "html/template" "io" "os/exec" + "regexp" "strings" "syscall" @@ -15,6 +16,8 @@ import ( //go:embed openmodelz.yaml var yamlContent string +var resultDomain string + // openModelZInstallStep installs the OpenModelZ deployments. type openModelZInstallStep struct { options Options @@ -47,7 +50,10 @@ func (s *openModelZInstallStep) Run() error { variables := struct { Domain string IpToDomain bool - }{} + Version string + }{ + Version: s.options.Version, + } if s.options.Domain != nil { variables.Domain = *s.options.Domain variables.IpToDomain = false @@ -66,9 +72,14 @@ func (s *openModelZInstallStep) Run() error { panic(err) } + logrus.WithField("variables", variables). + Debugf("Deploying OpenModelZ with the following variables") + if _, err := io.WriteString(stdin, buf.String()); err != nil { return err } + // Close the input stream to finish the pipe. Then the command will use the + // input from the pipe to start the next process. stdin.Close() fmt.Fprintf(s.options.OutputStream, "🚧 Waiting for the server to be ready...\n") @@ -90,8 +101,17 @@ func (s *openModelZInstallStep) Verify() error { return err } logrus.Debugf("kubectl get cmd output: %s\n", output) - if len(output) == 0 { + if len(output) <= 4 { return fmt.Errorf("cannot get the ingress ip: output is empty") } + + // Get the IP from the output lie this: `[{"ip":"192.168.71.93"}]` + re := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`) + found := re.MatchString(string(output)) + if !found { + return fmt.Errorf("cannot get the ingress ip") + } + + resultDomain = re.FindString(string(output)) return nil } diff --git a/mdz/pkg/server/registries.yaml b/mdz/pkg/server/registries.yaml new file mode 100644 index 0000000..e073e1d --- /dev/null +++ b/mdz/pkg/server/registries.yaml @@ -0,0 +1,4 @@ +mirrors: + {{ .Name }}: + endpoint: + {{ range $endpoint := .Endpoints }}- "{{ $endpoint }}"{{ end }} diff --git a/mdz/pkg/telemetry/telemetry.go b/mdz/pkg/telemetry/telemetry.go new file mode 100644 index 0000000..580a9bd --- /dev/null +++ b/mdz/pkg/telemetry/telemetry.go @@ -0,0 +1,166 @@ +package telemetry + +import ( + "io" + "os" + "path/filepath" + "runtime" + "sync" + "time" + + "github.com/cockroachdb/errors" + "github.com/google/uuid" + segmentio "github.com/segmentio/analytics-go/v3" + "github.com/sirupsen/logrus" + + "github.com/tensorchord/openmodelz/mdz/pkg/version" +) + +type TelemetryField func(*segmentio.Properties) + +type Telemetry interface { + Record(command string, args ...TelemetryField) +} + +type defaultTelemetry struct { + client segmentio.Client + uid string + enabled bool +} + +const telemetryToken = "65WHA9bxCNX74K3HjgplMOmsio9LkYSI" + +var ( + once sync.Once + telemetry *defaultTelemetry + telemetryConfigFile string +) + +func init() { + home, err := os.UserHomeDir() + if err != nil { + panic(err) + } + telemetryConfigFile = filepath.Join(home, ".config", "openmodelz", "telemetry") +} + +func GetTelemetry() Telemetry { + return telemetry +} + +func Initialize(enabled bool) error { + once.Do(func() { + client, err := segmentio.NewWithConfig(telemetryToken, segmentio.Config{ + BatchSize: 1, + }) + if err != nil { + panic(err) + } + telemetry = &defaultTelemetry{ + client: client, + enabled: enabled, + } + }) + return telemetry.init() +} + +func (t *defaultTelemetry) init() error { + if !t.enabled { + return nil + } + // detect if the config file already exists + _, err := os.Stat(telemetryConfigFile) + if err != nil { + if !os.IsNotExist(err) { + return errors.Wrap(err, "failed to stat telemetry config file") + } + t.uid = uuid.New().String() + return t.dumpConfig() + } + if err = t.loadConfig(); err != nil { + return errors.Wrap(err, "failed to load telemetry config") + } + t.Idnetify() + return nil +} + +func (t *defaultTelemetry) dumpConfig() error { + if err := os.MkdirAll(filepath.Dir(telemetryConfigFile), os.ModeDir|0700); err != nil { + return errors.Wrap(err, "failed to create telemetry config directory") + } + file, err := os.Create(telemetryConfigFile) + if err != nil { + return errors.Wrap(err, "failed to create telemetry config file") + } + defer file.Close() + _, err = file.WriteString(t.uid) + if err != nil { + return errors.Wrap(err, "failed to write telemetry config file") + } + return nil +} + +func (t *defaultTelemetry) loadConfig() error { + file, err := os.Open(telemetryConfigFile) + if err != nil { + return errors.Wrap(err, "failed to open telemetry config file") + } + defer file.Close() + uid, err := io.ReadAll(file) + if err != nil { + return errors.Wrap(err, "failed to read telemetry config file") + } + t.uid = string(uid) + return nil +} + +func (t *defaultTelemetry) Idnetify() { + if !t.enabled { + return + } + v := version.GetOpenModelzVersion() + if err := t.client.Enqueue(segmentio.Identify{ + UserId: t.uid, + Context: &segmentio.Context{ + OS: segmentio.OSInfo{ + Name: runtime.GOOS, + Version: runtime.GOARCH, + }, + App: segmentio.AppInfo{ + Name: "openmodelz", + Version: v, + }, + }, + Timestamp: time.Now(), + Traits: segmentio.NewTraits(), + }); err != nil { + logrus.WithError(err).Debug("failed to identify user") + return + } +} + +func AddField(name string, value interface{}) TelemetryField { + return func(p *segmentio.Properties) { + p.Set(name, value) + } +} + +func (t *defaultTelemetry) Record(command string, fields ...TelemetryField) { + if !t.enabled { + return + } + logrus.WithField("UID", t.uid).WithField("command", command).Debug("send telemetry") + track := segmentio.Track{ + UserId: t.uid, + Event: command, + Properties: segmentio.NewProperties(), + } + for _, field := range fields { + field(&track.Properties) + } + if err := t.client.Enqueue(track); err != nil { + logrus.WithError(err).Debug("failed to send telemetry") + } + // make sure the msg can be sent out + t.client.Close() +} diff --git a/mdz/pkg/version/version.go b/mdz/pkg/version/version.go index 20deaef..5bd3562 100644 --- a/mdz/pkg/version/version.go +++ b/mdz/pkg/version/version.go @@ -30,6 +30,8 @@ var ( // Package is filled at linking time Package = "github.com/tensorchord/openmodelz/agent" + HelmChartVersion = "0.0.13" + // Revision is filled with the VCS (e.g. git) revision being used to build // the program at linking time. Revision = "" @@ -42,7 +44,7 @@ var ( developmentFlag = "false" ) -// Version contains envd version information +// Version contains OpenModelz version information type Version struct { Version string BuildDate string @@ -63,8 +65,8 @@ func SetGitTagForE2ETest(tag string) { gitTag = tag } -// GetEnvdVersion gets Envd version information -func GetEnvdVersion() string { +// GetOpenModelzVersion gets OpenModelz version information +func GetOpenModelzVersion() string { var versionStr string if gitCommit != "" && gitTag != "" && @@ -95,7 +97,7 @@ func GetEnvdVersion() string { // GetVersion returns the version information func GetVersion() Version { return Version{ - Version: GetEnvdVersion(), + Version: GetOpenModelzVersion(), BuildDate: buildDate, GitCommit: gitCommit, GitTag: gitTag, @@ -126,5 +128,5 @@ func UserAgent() string { version = matches[0][1] + "-dev" } - return "envd/" + version + return "modelz/" + version } diff --git a/modelzetes/LICENSE b/modelzetes/LICENSE index b2b0490..c820691 100644 --- a/modelzetes/LICENSE +++ b/modelzetes/LICENSE @@ -1,5 +1,6 @@ MIT License +Copyright (c) 2023 TensorChord Inc. Copyright (c) 2020 OpenFaaS Author(s) Copyright (c) 2017 Alex Ellis diff --git a/modelzetes/pkg/app/root.go b/modelzetes/pkg/app/root.go index bfef40d..e98bf5d 100644 --- a/modelzetes/pkg/app/root.go +++ b/modelzetes/pkg/app/root.go @@ -21,7 +21,7 @@ const ( flagDebug = "debug" // metrics - flageMetricsServerPort = "metrics-server-port" + flagMetricsServerPort = "metrics-server-port" // kubernetes flagMasterURL = "master-url" @@ -73,7 +73,7 @@ func New() App { EnvVars: []string{"DEBUG"}, }, &cli.IntFlag{ - Name: flageMetricsServerPort, + Name: flagMetricsServerPort, Value: 8081, Usage: "port to listen on", EnvVars: []string{"MODELZETES_SERVER_PORT"}, diff --git a/modelzetes/pkg/k8s/proxy.go b/modelzetes/pkg/k8s/proxy.go index c05b24d..e5662f8 100644 --- a/modelzetes/pkg/k8s/proxy.go +++ b/modelzetes/pkg/k8s/proxy.go @@ -109,6 +109,6 @@ func (l *FunctionLookup) verifyNamespace(name string) error { if name != "kube-system" { return nil } - // ToDo use global namepace parse and validation + // ToDo use global namespace parse and validation return fmt.Errorf("namespace not allowed") } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f333999 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,51 @@ +[project] +name = "openmodelz" +description = "Simplify machine learning deployment for any environments." +readme = "README.md" +authors = [ + {name = "TensorChord", email = "modelz@tensorchord.ai"}, +] +license = {text = "Apache-2.0"} +keywords = ["machine learning", "deep learning", "model serving"] +dynamic = ["version"] +requires-python = ">=2.7" +classifiers = [ + "Environment :: GPU", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Software Development :: Build Tools", +] + +[project.urls] +homepage = "https://modelz.ai/" +documentation = "https://docs.open.modelz.ai/" +repository = "https://github.com/tensorchord/openmodelz" +changelog = "https://github.com/tensorchord/openmodelz/releases" + +[tool.cibuildwheel] +build-frontend = "build" +archs = ["auto64"] +skip = "pp*" # skip pypy +before-all = "" +environment = { PIP_NO_CLEAN="yes" } +before-build = "ls -la mdz/bin" # help to debug + +[project.optional-dependencies] + +[project.scripts] + +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +write_to = "mdz/_version.py" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..eab70e2 --- /dev/null +++ b/setup.py @@ -0,0 +1,64 @@ +import os +import subprocess +import shlex +from wheel.bdist_wheel import bdist_wheel +from setuptools import setup, find_packages, Extension +from setuptools.command.build_ext import build_ext +from setuptools_scm import get_version + + +with open("README.md", "r", encoding="utf-8") as f: + readme = f.read() + +class bdist_wheel_universal(bdist_wheel): + def get_tag(self): + *_, plat = super().get_tag() + return "py2.py3", "none", plat + + +def build_if_not_exist(): + if os.path.isfile("mdz/bin/mdz"): + return + version = get_version() + print(f"build mdz from source ({version})") + subprocess.call(["make", "mdz"]) + errno = subprocess.call(shlex.split( + f"make build-release GIT_TAG={version}" + ), cwd="mdz") + assert errno == 0, f"mdz build failed with code {errno}" + + +class ModelzExtension(Extension): + """A custom extension to define the OpenModelz extension.""" + + +class ModelzBuildExt(build_ext): + def build_extension(self, ext: Extension) -> None: + if not isinstance(ext, ModelzExtension): + return super().build_extension(ext) + + build_if_not_exist() + + +setup( + name="openmodelz", + use_scm_version=True, + description="Simplify machine learning deployment for any environments.", + long_description=readme, + long_description_content_type="text/markdown", + url="https://github.com/tensorchord/openmodelz", + license="Apache License 2.0", + author="TensorChord", + author_email="modelz@tensorchord.ai", + packages=find_packages("mdz"), + include_package_data=True, + data_files=[("bin", ["mdz/bin/mdz"])], + zip_safe=False, + ext_modules=[ + ModelzExtension(name="mdz", sources=["mdz/*"]), + ], + cmdclass=dict( + build_ext=ModelzBuildExt, + bdist_wheel=bdist_wheel_universal, + ), +) diff --git a/typos.toml b/typos.toml new file mode 100644 index 0000000..b4298e3 --- /dev/null +++ b/typos.toml @@ -0,0 +1,5 @@ +[files] +extend-exclude = ["CHANGELOG.md", "go.mod", "go.sum"] +[default.extend-words] +requestor = "requestor" +ba = "ba"