大数据

Seatunnel安装部署

一、官网下载安装包
https://seatunnel.apache.org/docs/2.3.11/start-v2/locally/deployment

export version="2.3.11"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"

解压到指定目录

二、下载组件

1、下载依赖组件

sh bin/install-plugin.sh 2.3.11

默认下载所有的组件

定制下载组件可以编辑文件 config/plugin_config 指定组件名称

--seatunnel-connectors--
connector-fake
connector-console
--end--

2、补充lib依赖jar包

3、如果网络较差 可以使用docker下载整个镜像

sudo docker pull docker.m.daocloud.io/apache/seatunnel:2.3.11

Docker中组件和lib是全部的
Docker启动一个cdc任务,然后可以从docker中获取/opt/seatunnel
Docker运行命令:

docker run --rm -it -v D://conf:/config apache/seatunnel:2.3.11 ./bin/seatunnel.sh -m local -c /config/tidb2doris_conf

三、示例
1、cdc

env {
  parallelism = 1
  job.mode = "STREAMING"
  checkpoint.interval = 5000
}

source {
  # This is a example source plugin **only for test and demonstrate the feature source plugin**
  TiDB-CDC {
    plugin_output = "products_tidb_cdc"
    base-url = "jdbc:mysql://192.168.0.xxx:4000/xxx"
    driver = "com.mysql.cj.jdbc.Driver"
    tikv.grpc.timeout_in_ms = 20000
    pd-addresses = "192.168.0.xxx:2379"
    username = "root"
    password = "xxx"
    database-name = "xxx"
    table-name = "xxx"
  }
}

transform {
}

sink {
  Doris {
    fenodes = "192.168.0.xxx:8030"
    username = xxx
    password = "xxx"
    database = "xxx"
    table = "xxx"
    sink.label-prefix = "test-cdc"
    sink.enable-2pc = "true"
    sink.enable-delete = "true"
    doris.config {
      format = "json"
      read_json_by_line = "true"
    }
  }
}

2、批任务

env {
  parallelism = 4
  job.mode = "BATCH"
}
source{
    Jdbc {
        url = "jdbc:mysql://192.168.0.xxx:4000/xxx?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true"
        driver = "com.mysql.cj.jdbc.Driver"
        connection_check_timeout_sec = 100
        user = "root"
        password = "xxx"
        query = "select * from xxx"
    }
}

sink {
  Doris {
    fenodes = "192.168.0.235:8030"
    username = xxx
    password = "xxx"
    database = "xxx"
    table = "xxx"
    sink.label-prefix = "test-cdc"
    sink.enable-2pc = "true"
    sink.enable-delete = "true"
    doris.config {
      format = "json"
      read_json_by_line = "true"
    }
  }
}