Unverified Commit d09efa36 authored by ZackYoung's avatar ZackYoung Committed by GitHub

Optimize cdc SQLSinkBuilder KafkaSinkBuilder, filter to process (#806)

* 优化 cdc SQLSinkBuilder KafkaSinkBuilder, filter to process

* change "_" to "."

* change "db" to config.getSchemaFieldName()

* change "tableMap key" to table.getSchemaTableName()
parent ebe5b6af
...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation; ...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*; import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind; import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector; import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder { ...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
} }
}); });
} }
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData( protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator, SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList, List<String> columnNameList,
......
...@@ -20,31 +20,32 @@ ...@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka; package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction; import com.dlink.assertion.Asserts;
import org.apache.flink.api.common.functions.MapFunction; import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/** /**
* MysqlCDCBuilder * MysqlCDCBuilder
* *
...@@ -89,44 +90,47 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder ...@@ -89,44 +90,47 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment, CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) { DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) { if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"), dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"), config.getSink().get("topic"),
new SimpleStringSchema())); new SimpleStringSchema()));
} else { } else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList(); final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName(); final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) { if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) { for (Schema schema : schemaList) {
for (Table table : schema.getTables()) { for (Table table : schema.getTables()) {
final String tableName = table.getName(); String sinkTableName = getSinkTableName(table);
final String schemaName = table.getSchema(); OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() { };
@Override tagMap.put(table, outputTag);
public boolean filter(Map value) throws Exception { tableMap.put(table.getSchemaTableName(), table);
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
} }
} }
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
} }
} }
return dataStreamSource; return dataStreamSource;
......
...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation; ...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*; import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind; import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector; import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder { ...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
} }
}); });
} }
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData( protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator, SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList, List<String> columnNameList,
......
...@@ -20,31 +20,32 @@ ...@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka; package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction; import com.dlink.assertion.Asserts;
import org.apache.flink.api.common.functions.MapFunction; import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/** /**
* MysqlCDCBuilder * MysqlCDCBuilder
* *
...@@ -89,44 +90,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder ...@@ -89,44 +90,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment, CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) { DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) { if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"), dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"), config.getSink().get("topic"),
new SimpleStringSchema())); new SimpleStringSchema()));
} else { } else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList(); final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName(); final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) { if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) { for (Schema schema : schemaList) {
for (Table table : schema.getTables()) { for (Table table : schema.getTables()) {
final String tableName = table.getName(); String sinkTableName = getSinkTableName(table);
final String schemaName = table.getSchema(); OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() { };
@Override tagMap.put(table, outputTag);
public boolean filter(Map value) throws Exception { tableMap.put(table.getSchemaTableName(), table);
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
} }
} }
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
} }
} }
return dataStreamSource; return dataStreamSource;
......
...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation; ...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*; import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind; import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector; import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder { ...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
} }
}); });
} }
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData( protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator, SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList, List<String> columnNameList,
......
...@@ -20,31 +20,32 @@ ...@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka; package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction; import com.dlink.assertion.Asserts;
import org.apache.flink.api.common.functions.MapFunction; import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/** /**
* MysqlCDCBuilder * MysqlCDCBuilder
* *
...@@ -79,44 +80,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder ...@@ -79,44 +80,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment, CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) { DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) { if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"), dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"), config.getSink().get("topic"),
new SimpleStringSchema())); new SimpleStringSchema()));
} else { } else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList(); final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName(); final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) { if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) { for (Schema schema : schemaList) {
for (Table table : schema.getTables()) { for (Table table : schema.getTables()) {
final String tableName = table.getName(); String sinkTableName = getSinkTableName(table);
final String schemaName = table.getSchema(); OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() { };
@Override tagMap.put(table, outputTag);
public boolean filter(Map value) throws Exception { tableMap.put(table.getSchemaTableName(), table);
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
} }
} }
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
} }
} }
return dataStreamSource; return dataStreamSource;
......
...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation; ...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*; import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind; import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector; import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder { ...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
} }
}); });
} }
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData( protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator, SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList, List<String> columnNameList,
......
...@@ -20,9 +20,16 @@ ...@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka; package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction; import com.dlink.assertion.Asserts;
import org.apache.flink.api.common.functions.MapFunction; import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink; import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
...@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream; ...@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/** /**
* MysqlCDCBuilder * MysqlCDCBuilder
* *
...@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder ...@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment, CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) { DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) { if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder() KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setBootstrapServers(config.getSink().get("brokers")) .setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setRecordSerializer(KafkaRecordSerializationSchema.builder() .setTopic(config.getSink().get("topic"))
.setTopic(config.getSink().get("topic")) .setValueSerializationSchema(new SimpleStringSchema())
.setValueSerializationSchema(new SimpleStringSchema()) .build()
.build() )
) .setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build()); .build();
dataStreamSource.sinkTo(kafkaSink);
} else { } else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList(); final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName(); final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) { if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override @Override
public Map map(String value) throws Exception { public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
ObjectMapper objectMapper = new ObjectMapper(); LinkedHashMap source = (LinkedHashMap) map.get("source");
return objectMapper.readValue(value, Map.class); try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
} }
}); });
for (Schema schema : schemaList) { tagMap.forEach((k, v) -> {
for (Table table : schema.getTables()) { String topic = getSinkTableName(k);
final String tableName = table.getName(); KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder() .setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table)) .setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema()) .setValueSerializationSchema(new SimpleStringSchema())
.build() .build()
) )
.build()); .setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
} .build();
} process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
} }
} }
return dataStreamSource; return dataStreamSource;
......
...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation; ...@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*; import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind; import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector; import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder { ...@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
} }
}); });
} }
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData( protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator, SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList, List<String> columnNameList,
......
...@@ -20,9 +20,16 @@ ...@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka; package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction; import com.dlink.assertion.Asserts;
import org.apache.flink.api.common.functions.MapFunction; import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink; import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
...@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream; ...@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/** /**
* MysqlCDCBuilder * MysqlCDCBuilder
* *
...@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder ...@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment, CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) { DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) { if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder() KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setBootstrapServers(config.getSink().get("brokers")) .setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setRecordSerializer(KafkaRecordSerializationSchema.builder() .setTopic(config.getSink().get("topic"))
.setTopic(config.getSink().get("topic")) .setValueSerializationSchema(new SimpleStringSchema())
.setValueSerializationSchema(new SimpleStringSchema()) .build()
.build() )
) .setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build()); .build();
dataStreamSource.sinkTo(kafkaSink);
} else { } else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList(); final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName(); final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) { if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override @Override
public Map map(String value) throws Exception { public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
ObjectMapper objectMapper = new ObjectMapper(); LinkedHashMap source = (LinkedHashMap) map.get("source");
return objectMapper.readValue(value, Map.class); try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
} }
}); });
for (Schema schema : schemaList) { tagMap.forEach((k, v) -> {
for (Table table : schema.getTables()) { String topic = getSinkTableName(k);
final String tableName = table.getName(); KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder() .setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table)) .setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema()) .setValueSerializationSchema(new SimpleStringSchema())
.build() .build()
) )
.build()); .setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
} .build();
} process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
} }
} }
return dataStreamSource; return dataStreamSource;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment