Unverified Commit d09efa36 authored by ZackYoung's avatar ZackYoung Committed by GitHub

Optimize cdc SQLSinkBuilder KafkaSinkBuilder, filter to process (#806)

* 优化 cdc SQLSinkBuilder KafkaSinkBuilder, filter to process

* change "_" to "."

* change "db" to config.getSchemaFieldName()

* change "tableMap key" to table.getSchemaTableName()
parent ebe5b6af
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -89,44 +90,47 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -89,44 +90,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -79,44 +80,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
......@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
dataStreamSource.sinkTo(kafkaSink);
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
.setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
}
}
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
......@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
dataStreamSource.sinkTo(kafkaSink);
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
.setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
}
}
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
}
}
return dataStreamSource;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment