GreptimeTeam · fengjiachun · Jan 6, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
@@ -1,12 +1,14 @@
 ---
 date: 2024-04-08
+keywords: [release notes, greptimedb, 0.7.2]
+description: GreptimeDB 0.7.2 release notes.
 ---
 
 # v0.7.2
 
 Release date: April 08, 2024
 
-This is a patch release, containing a critial bug fix to avoid wrongly delete data files ([#3635](https://github.com/GreptimeTeam/greptimedb/pull/3635)).
+This is a patch release, containing a critical bug fix to avoid wrongly delete data files ([#3635](https://github.com/GreptimeTeam/greptimedb/pull/3635)).
 
 **It's highly recommended to upgrade to this version if you're using v0.7.**
 

@@ -1,4 +1,4 @@
 ---
 keywords: [Kafka, Vector, logs, metrics, data ingestion]
 description: Guide on using Kafka with Vector to ingest logs and metrics data into GreptimeCloud.
 ---
@@ -63,3 +63,8 @@
 password = "<password>"
 tls = {}
 ```
+
+## Reference
+
+For detailed information on the data ingestion process, please refer to the [Ingest Data via Kafka](http://docs.greptime.com/nightly/user-guide/ingest-data/for-observerbility/kafka) guide.
+
@@ -0,0 +1,8 @@
+---
+keywords: [Kafka, Data Ingestion]
+description: Write data from Kafka to GreptimeDB.
+---
+
+# Kafka
+
+Please refer to the [Kafka documentation](/user-guide/ingest-data/for-observerbility/kafka.md) for instructions on how to ingest data from Kafka into GreptimeDB.
@@ -0,0 +1,172 @@
+---
+keywords: [Kafka, data ingestion, observability, metrics, logs, JSON logs, text logs, Vector, InfluxDB line protocol]
+description: Learn how to ingest observability data from Kafka into GreptimeDB using Vector. This guide covers metrics and logs ingestion, including JSON and text log formats, with detailed configuration examples.
+---
+
+# Kafka
+
+If you are using Kafka or Kafka-compatible message queue for observability data
+transporting, it's possible to ingest data into GreptimeDB directly.
+
+Here we are using Vector as the tool to transport data from Kafka to GreptimeDB.
+
+## Metrics
+
+When ingesting metrics from Kafka into GreptimeDB, messages should be formatted in InfluxDB line protocol. For example:
+
+```txt
+census,location=klamath,scientist=anderson bees=23 1566086400000000000
+```
+
+Then configure Vector to use the `influxdb` decoding codec to process these messages.
+
+```toml
+[sources.metrics_mq]
+# Specifies that the source type is Kafka
+type = "kafka"
+# The consumer group ID for Kafka
+group_id = "vector0"
+# The list of Kafka topics to consume messages from
+topics = ["test_metric_topic"]
+# The address of the Kafka broker to connect to
+bootstrap_servers = "kafka:9092"
+# The `influxdb` means the messages are expected to be in InfluxDB line protocol format.
+decoding.codec = "influxdb"
+
+[sinks.metrics_in]
+inputs = ["metrics_mq"]
+# Specifies that the sink type is `greptimedb_metrics`
+type = "greptimedb_metrics"
+# The endpoint of the GreptimeDB server.
+# Replace <host> with the actual hostname or IP address.
+endpoint = "<host>:4001"
+dbname = "<dbname>"
+username = "<username>"
+password = "<password>"
+tls = {}
+```
+
+For details on how InfluxDB line protocol metrics are mapped to GreptimeDB data, please refer to the [Data Model](/user-guide/ingest-data/for-iot/influxdb-line-protocol.md#data-model) section in the InfluxDB line protocol documentation.
+
+
+## Logs
+
+Developers commonly work with two types of logs: JSON logs and plain text logs.
+Consider the following examples sent from Kafka.
+
+A plain text log:
+
+```txt
+127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+```
+
+Or a JSON log:
+
+```json
+{
+  "timestamp": "2024-12-23T10:00:00Z",
+  "level": "INFO",
+  "message": "Service started"
+}
+```
+
+GreptimeDB transforms these logs into structured data with multiple columns and automatically creates the necessary tables.
+A pipeline processes the logs into structured data before ingestion into GreptimeDB. Different log formats require different [Pipelines](/user-guide/logs/quick-start.md#write-logs-by-pipeline) for parsing. See the following sections for details.
+
+### Logs with JSON format
+
+For logs in JSON format (e.g., `{"timestamp": "2024-12-23T10:00:00Z", "level": "INFO", "message": "Service started"}`),
+you can use the built-in [`greptime_identity`](/user-guide/logs/manage-pipelines.md#greptime_identity) pipeline for direct ingestion.
+This pipeline creates columns automatically based on the fields in your JSON log message.
+
+Simply configure Vector's `transforms` settings to parse the JSON message and use the `greptime_identity` pipeline as shown in the following example:
+
+```toml
+[sources.logs_in]
+type = "kafka"
+# The consumer group ID for Kafka
+group_id = "vector0"
+# The list of Kafka topics to consume messages from
+topics = ["test_log_topic"]
+# The address of the Kafka broker to connect to
+bootstrap_servers = "kafka:9092"
+
+# transform the log to JSON format
+[transforms.logs_json]
+type = "remap"
+inputs = ["logs_in"]
+source = '''
+. = parse_json!(.message)
+'''
+
+[sinks.logs_out]
+# Specifies that this sink will receive data from the `logs_json` source
+inputs = ["logs_json"]
+# Specifies that the sink type is `greptimedb_logs`
+type = "greptimedb_logs"
+# The endpoint of the GreptimeDB server
+endpoint = "http://<host>:4000"
+compression = "gzip"
+# Replace <dbname>, <username>, and <password> with the actual values
+dbname = "<dbname>"
+username = "<username>"
+password = "<password>"
+# The table name in GreptimeDB, if it doesn't exist, it will be created automatically
+table = "demo_logs"
+# Use the built-in `greptime_identity` pipeline
+pipeline_name = "greptime_identity"
+```
+
+### Logs with text format
+
+For logs in text format, such as the access log format below, you'll need to create a custom pipeline to parse them:
+
+```
+127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+```
+
+#### Create a pipeline
+
+To create a custom pipeline,
+please refer to the [Create Pipeline](/user-guide/logs/quick-start.md#create-a-pipeline)
+and [Pipeline Configuration](/user-guide/logs/pipeline-config.md) documentation for detailed instructions.
+
+#### Ingest data
+
+After creating the pipeline, configure it to the `pipeline_name` field in the Vector configuration file.
+
+```toml
+# sample.toml
+[sources.log_mq]
+# Specifies that the source type is Kafka
+type = "kafka"
+# The consumer group ID for Kafka
+group_id = "vector0"
+# The list of Kafka topics to consume messages from
+topics = ["test_log_topic"]
+# The address of the Kafka broker to connect to
+bootstrap_servers = "kafka:9092"
+
+[sinks.sink_greptime_logs]
+# Specifies that the sink type is `greptimedb_logs`
+type = "greptimedb_logs"
+# Specifies that this sink will receive data from the `log_mq` source
+inputs = [ "log_mq" ]
+# Use `gzip` compression to save bandwidth
+compression = "gzip"
+# The endpoint of the GreptimeDB server
+# Replace <host> with the actual hostname or IP address
+endpoint = "http://<host>:4000"
+dbname = "<dbname>"
+username = "<username>"
+password = "<password>"
+# The table name in GreptimeDB, if it doesn't exist, it will be created automatically
+table = "demo_logs"
+# The custom pipeline name that you created
+pipeline_name = "your_custom_pipeline"
+```
+
+## Demo
+
+For a runnable demo of data transformation and ingestion, please refer to the [Kafka Ingestion Demo](https://github.com/GreptimeTeam/demo-scene/tree/main/kafka-ingestion).
+
@@ -11,7 +11,7 @@ sink. With vector, you can ingest metrics data from various sources, including
 Prometheus, OpenTelemetry, StatsD and many more.
 GreptimeDB can be used as a Vector Sink component to receive metrics. 
 
-## Collect metrics
+## Collect host metrics
 
 ### Configuration
 
@@ -60,3 +60,11 @@ The following rules are used when storing Vector metrics into GreptimeDB:
   - For AggregatedSummary metrics, the values of each percentile are stored in the `pxx` column, where xx is the percentile, and the `sum/count` columns are also stored;
   - For Sketch metrics, the values of each percentile are stored in the `pxx` column, where xx is the percentile, and the `min/max/avg/sum` columns are also stored;
 
+## Collect metrics with InfluxDB line protocol format
+
+Vector can collect metrics in the InfluxDB line protocol format and send them to GreptimeDB. For more information, refer to the [Kafka guide](/user-guide/ingest-data/for-observerbility/kafka.md#metrics).
+
+## Collect logs
+
+Vector can also collect logs and send them to GreptimeDB. For more details, refer to the [Kafka guide](/user-guide/ingest-data/for-observerbility/kafka.md#logs).
+
@@ -0,0 +1,10 @@
+---
+keywords: [Kafka, data ingestion, observability, metrics, logs]
+description: Learn how to ingest observability data from Kafka into GreptimeDB using Vector.
+---
+
+# Kafka
+
+Vector can be used as a tool to transport data from Kafka to GreptimeDB.
+For more information, please refer to the [Ingest Data via Kafka](/user-guide/ingest-data/for-observerbility/kafka.md) guide.
+
diff --git a/docusaurus.config.ts b/docusaurus.config.ts
@@ -207,7 +207,7 @@ const config: Config = {
     prism: {
       theme: prismThemes.github,
       darkTheme: prismThemes.dracula,
-      additionalLanguages: ['java'],
+      additionalLanguages: ['java', 'toml'],
     },
     algolia: algoliaMap[locale]
    //,

@@ -1,4 +1,4 @@
 ---
 keywords: [Kafka, 数据传输, 日志数据, 指标数据, 配置示例]
 description: 介绍如何使用 Kafka 将数据传输到 GreptimeCloud，并提供了日志和指标数据的配置示例。
 ---
@@ -60,3 +60,8 @@
 password = "<password>"
 tls = {}
 ```
+
+## 参考文档
+
+请参考[通过 Kafka 写入数据](http://docs.greptime.cn/nightly/user-guide/ingest-data/for-observerbility/kafka)获取数据写入过程的详细信息。
+
@@ -0,0 +1,9 @@
+---
+keywords: [Kafka, 数据写入]
+description: 将数据从 Kafka 写入到 GreptimeDB.
+---
+
+# Kafka
+
+请参考 [Kafka 文档](/user-guide/ingest-data/for-observerbility/kafka.md)了解如何将数据从 Kafka 写入到 GreptimeDB。
+