Commit beeb8a51 authored by lcg's avatar lcg

任务监测

parent aeef7d1e
......@@ -125,6 +125,10 @@
<groupId>com.dlink</groupId>
<artifactId>dlink-alert-base</artifactId>
</dependency>
<dependency>
<groupId>com.dlink</groupId>
<artifactId>dlink-daemon</artifactId>
</dependency>
<dependency>
<groupId>com.dlink</groupId>
<artifactId>dlink-executor</artifactId>
......
package com.dlink.init;
import com.dlink.daemon.task.DaemonFactory;
import com.dlink.daemon.task.DaemonTaskConfig;
import com.dlink.job.FlinkJobTask;
import com.dlink.model.JobInstance;
import com.dlink.service.JobInstanceService;
import com.dlink.service.SysConfigService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
/**
* SystemInit
*
......@@ -17,12 +27,22 @@ import org.springframework.stereotype.Component;
@Order(value = 1)
public class SystemInit implements ApplicationRunner {
private static final Logger log = LoggerFactory.getLogger(SystemInit.class);
@Autowired
private SysConfigService sysConfigService;
@Autowired
private JobInstanceService jobInstanceService;
@Override
public void run(ApplicationArguments args) throws Exception {
sysConfigService.initSysConfig();
List<JobInstance> jobInstances = jobInstanceService.listJobInstanceActive();
List<DaemonTaskConfig> configList =new ArrayList<>();
for(JobInstance jobInstance: jobInstances){
configList.add(new DaemonTaskConfig(FlinkJobTask.TYPE,jobInstance.getId()));
}
log.info("启动的任务数量:"+ configList.size());
DaemonFactory.start(configList);
}
}
package com.dlink.job;
import com.dlink.context.SpringContextUtils;
import com.dlink.daemon.constant.FlinkTaskConstant;
import com.dlink.daemon.pool.DefaultThreadPool;
import com.dlink.daemon.task.DaemonTask;
import com.dlink.daemon.task.DaemonTaskConfig;
import com.dlink.model.JobInstance;
import com.dlink.model.JobStatus;
import com.dlink.service.TaskService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.DependsOn;
@DependsOn("springContextUtils")
public class FlinkJobTask implements DaemonTask {
private static final Logger log = LoggerFactory.getLogger(FlinkJobTask.class);
private DaemonTaskConfig config;
public final static String TYPE = "jobInstance";
private static TaskService taskService;
private long preDealTime;
static {
taskService = SpringContextUtils.getBean("taskServiceImpl", TaskService.class);
}
@Override
public DaemonTask setConfig(DaemonTaskConfig config) {
this.config = config;
return this;
}
@Override
public String getType() {
return TYPE;
}
@Override
public void dealTask() {
long gap = System.currentTimeMillis() - this.preDealTime;
if(gap < FlinkTaskConstant.TIME_SLEEP){
try {
Thread.sleep(FlinkTaskConstant.TIME_SLEEP);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
preDealTime = System.currentTimeMillis();
JobInstance jobInstance = taskService.refreshJobInstance(config.getId());
log.info("监控任务:"+jobInstance.getId());
if(!JobStatus.isDone(jobInstance.getStatus())){
DefaultThreadPool.getInstance().execute(this);
};
}
}
......@@ -2,15 +2,14 @@ package com.dlink.job;
import com.dlink.assertion.Asserts;
import com.dlink.context.SpringContextUtils;
import com.dlink.daemon.task.DaemonFactory;
import com.dlink.daemon.task.DaemonTaskConfig;
import com.dlink.model.*;
import com.dlink.service.*;
import com.dlink.utils.JSONUtil;
import org.apache.commons.lang3.StringUtils;
import org.springframework.context.annotation.DependsOn;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
/**
* Job2MysqlHandler
......@@ -95,17 +94,17 @@ public class Job2MysqlHandler implements JobHandler {
if (Asserts.isNotNull(cluster)) {
clusterId = cluster.getId();
}
}else{
} else {
cluster = clusterService.getById(clusterId);
}
history.setClusterId(clusterId);
historyService.updateById(history);
ClusterConfiguration clusterConfiguration = null;
if(Asserts.isNotNull(job.getJobConfig().getClusterConfigurationId())){
if (Asserts.isNotNull(job.getJobConfig().getClusterConfigurationId())) {
clusterConfiguration = clusterConfigurationService.getClusterConfigById(job.getJobConfig().getClusterConfigurationId());
}
Jar jar = null;
if(Asserts.isNotNull(job.getJobConfig().getJarId())){
if (Asserts.isNotNull(job.getJobConfig().getJarId())) {
jar = jarService.getById(job.getJobConfig().getJarId());
}
if (Asserts.isNotNullCollection(job.getJids())) {
......@@ -128,6 +127,7 @@ public class Job2MysqlHandler implements JobHandler {
jobHistory.setClusterJson(JSONUtil.toJsonString(cluster));
jobHistory.setClusterConfigurationJson(JSONUtil.toJsonString(clusterConfiguration));
jobHistoryService.save(jobHistory);
DaemonFactory.addTask(DaemonTaskConfig.build(FlinkJobTask.TYPE, jobInstance.getId()));
break;
}
}
......
......@@ -17,4 +17,6 @@ import java.util.List;
public interface JobInstanceMapper extends SuperMapper<JobInstance> {
List<JobInstanceCount> countStatus();
List<JobInstance> listJobInstanceActive();
}
......@@ -5,6 +5,8 @@ import com.dlink.model.JobInfoDetail;
import com.dlink.model.JobInstance;
import com.dlink.model.JobInstanceStatus;
import java.util.List;
/**
* JobInstanceService
*
......@@ -15,6 +17,8 @@ public interface JobInstanceService extends ISuperService<JobInstance> {
JobInstanceStatus getStatusCount();
List<JobInstance> listJobInstanceActive();
JobInfoDetail getJobInfoDetail(Integer id);
JobInfoDetail getJobInfoDetailInfo(JobInstance jobInstance);
......
......@@ -65,23 +65,27 @@ public class JobHistoryServiceImpl extends SuperServiceImpl<JobHistoryMapper, Jo
@Override
public JobHistory refreshJobHistory(Integer id, String jobManagerHost, String jobId) {
JsonNode jobInfo = FlinkAPI.build(jobManagerHost).getJobInfo(jobId);
JsonNode exception = FlinkAPI.build(jobManagerHost).getException(jobId);
JsonNode checkPoints = FlinkAPI.build(jobManagerHost).getCheckPoints(jobId);
JsonNode checkPointsConfig = FlinkAPI.build(jobManagerHost).getCheckPointsConfig(jobId);
JsonNode jobsConfig = FlinkAPI.build(jobManagerHost).getJobsConfig(jobId);
JobHistory jobHistory = new JobHistory();
jobHistory.setId(id);
jobHistory.setJobJson(JSONUtil.toJsonString(jobInfo));
jobHistory.setExceptionsJson(JSONUtil.toJsonString(exception));
jobHistory.setCheckpointsJson(JSONUtil.toJsonString(checkPoints));
jobHistory.setCheckpointsConfigJson(JSONUtil.toJsonString(checkPointsConfig));
jobHistory.setConfigJson(JSONUtil.toJsonString(jobsConfig));
if (Asserts.isNotNull(getById(id))) {
updateById(jobHistory);
} else {
save(jobHistory);
try {
JsonNode jobInfo = FlinkAPI.build(jobManagerHost).getJobInfo(jobId);
JsonNode exception = FlinkAPI.build(jobManagerHost).getException(jobId);
JsonNode checkPoints = FlinkAPI.build(jobManagerHost).getCheckPoints(jobId);
JsonNode checkPointsConfig = FlinkAPI.build(jobManagerHost).getCheckPointsConfig(jobId);
JsonNode jobsConfig = FlinkAPI.build(jobManagerHost).getJobsConfig(jobId);
jobHistory.setJobJson(JSONUtil.toJsonString(jobInfo));
jobHistory.setExceptionsJson(JSONUtil.toJsonString(exception));
jobHistory.setCheckpointsJson(JSONUtil.toJsonString(checkPoints));
jobHistory.setCheckpointsConfigJson(JSONUtil.toJsonString(checkPointsConfig));
jobHistory.setConfigJson(JSONUtil.toJsonString(jobsConfig));
if (Asserts.isNotNull(getById(id))) {
updateById(jobHistory);
} else {
save(jobHistory);
}
}catch (Exception e){
}finally {
return jobHistory;
}
return jobHistory;
}
}
package com.dlink.service.impl;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.dlink.assertion.Asserts;
import com.dlink.constant.FlinkRestResultConstant;
import com.dlink.db.service.impl.SuperServiceImpl;
......@@ -92,6 +93,11 @@ public class JobInstanceServiceImpl extends SuperServiceImpl<JobInstanceMapper,
return jobInstanceStatus;
}
@Override
public List<JobInstance> listJobInstanceActive() {
return baseMapper.listJobInstanceActive();
}
@Override
public JobInfoDetail getJobInfoDetail(Integer id) {
return getJobInfoDetailInfo(getById(id));
......
......@@ -424,7 +424,7 @@ public class TaskServiceImpl extends SuperServiceImpl<TaskMapper, Task> implemen
Cluster cluster = clusterService.getById(jobInstance.getClusterId());
JobHistory jobHistoryJson = jobHistoryService.refreshJobHistory(id, cluster.getJobManagerHost(), jobInstance.getJid());
JobHistory jobHistory = jobHistoryService.getJobHistoryInfo(jobHistoryJson);
if(jobHistory.getJob().has(FlinkRestResultConstant.ERRORS)){
if(Asserts.isNull(jobHistory.getJob())||jobHistory.getJob().has(FlinkRestResultConstant.ERRORS)){
jobInstance.setStatus(JobStatus.UNKNOWN.getValue());
}else{
jobInstance.setDuration(jobHistory.getJob().get(FlinkRestResultConstant.JOB_DURATION).asLong()/1000);
......@@ -443,6 +443,9 @@ public class TaskServiceImpl extends SuperServiceImpl<TaskMapper, Task> implemen
}
private void handleJobDone(JobInstance jobInstance){
if(Asserts.isNull(jobInstance.getTaskId())){
return;
}
Task task = new Task();
task.setId(jobInstance.getTaskId());
task.setJobInstanceId(0);
......
......@@ -49,4 +49,13 @@
dlink_job_instance
group by status
</select>
<select id="listJobInstanceActive" resultType="com.dlink.model.JobInstance">
select
*
from
dlink_job_instance
where status not in ('FAILED','CANCELED','FINISHED','UNKNOWN')
order by id desc
</select>
</mapper>
......@@ -9,7 +9,7 @@
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dlink-deamon</artifactId>
<artifactId>dlink-daemon</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
......
package com.dlink.daemon.constant;
public interface FlinkTaskConstant {
/**
* 检测停顿时间
*/
int TIME_SLEEP = 1000;
/**
* 启动线程轮询日志时间,用于设置work等信息
*/
int MAX_POLLING_GAP = 1000;
/**
* 最小
*/
int MIN_POLLING_GAP = 50;
}
package com.dlink.daemon.entity;
import javafx.concurrent.Task;
import java.util.LinkedList;
public class TaskQueue<T> {
private final LinkedList<T> tasks = new LinkedList<>();
private final Object lock = new Object();
public void enqueue(T task) {
synchronized (lock) {
lock.notifyAll();
tasks.addLast( task );
}
}
public T dequeue() {
synchronized (lock) {
while (tasks.isEmpty()) {
try {
lock.wait();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
T task = tasks.removeFirst();
return task;
}
}
public int getTaskSize() {
synchronized (lock) {
return tasks.size();
}
}
}
package com.dlink.daemon.entity;
import com.dlink.daemon.task.DaemonTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TaskWorker implements Runnable {
private static final Logger log = LoggerFactory.getLogger(TaskWorker.class);
private volatile boolean running = true;
private TaskQueue<DaemonTask> queue;
public TaskWorker(TaskQueue queue) {
this.queue = queue;
}
@Override
public void run() {
log.info("TaskWorker run");
while (running) {
DaemonTask daemonTask = queue.dequeue();
if (daemonTask != null) {
try {
daemonTask.dealTask();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public void shutdown() {
log.info(Thread.currentThread().getName() + "TaskWorker shutdown");
running = false;
}
}
package com.dlink.daemon.exception;
public class DaemonTaskException extends RuntimeException {
public DaemonTaskException(String message, Throwable cause) {
super(message, cause);
}
public DaemonTaskException(String message) {
super(message);
}
}
package com.dlink.daemon.pool;
import com.dlink.daemon.entity.TaskQueue;
import com.dlink.daemon.entity.TaskWorker;
import com.dlink.daemon.task.DaemonTask;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @author lcg
* @operate
* @date 2022/3/7 10:36
* @return
*/
public class DefaultThreadPool implements ThreadPool {
private static final int MAX_WORKER_NUM = 10;
private static final int DEFAULT_WORKER_NUM = 5;
private static final int MIN_WORKER_NUM = 1;
private final List<TaskWorker> workers = Collections.synchronizedList(new ArrayList<>());
private final Object lock = new Object();
private volatile AtomicInteger workerNum = new AtomicInteger(0);
private final TaskQueue<DaemonTask> queue = new TaskQueue<>();
private static DefaultThreadPool defaultThreadPool;
private DefaultThreadPool() {
addWorkers(DEFAULT_WORKER_NUM);
}
public static DefaultThreadPool getInstance() {
if (defaultThreadPool == null) {
synchronized (DefaultThreadPool.class) {
if(defaultThreadPool == null){
defaultThreadPool = new DefaultThreadPool();
}
}
}
return defaultThreadPool;
}
@Override
public void execute(DaemonTask daemonTask) {
if (daemonTask != null) {
queue.enqueue(daemonTask);
}
}
@Override
public void addWorkers(int num) {
synchronized (lock) {
if (num + this.workerNum.get() > MAX_WORKER_NUM) {
num = MAX_WORKER_NUM - this.workerNum.get();
if (num <= 0) return;
}
for (int i = 0; i < num; i++) {
TaskWorker worker = new TaskWorker(queue);
workers.add(worker);
Thread thread = new Thread(worker, "ThreadPool-Worker-" + workerNum.incrementAndGet());
thread.start();
}
}
}
@Override
public void removeWorker(int num) {
synchronized (lock) {
if (num >= this.workerNum.get()) {
num = this.workerNum.get() - MIN_WORKER_NUM;
if (num <= 0) return;
}
int count = num - 1;
while (count >= 0) {
TaskWorker worker = workers.get(count);
if (workers.remove(worker)) {
worker.shutdown();
count--;
}
}
//减少线程
workerNum.getAndAdd(-num);
}
}
@Override
public void shutdown() {
synchronized (lock) {
for (TaskWorker worker : workers) {
worker.shutdown();
}
workers.clear();
}
}
@Override
public int getTaskSize() {
return queue.getTaskSize();
}
public int getWorkCount() {
synchronized (lock) {
return this.workerNum.get();
}
}
}
package com.dlink.daemon.pool;
import com.dlink.daemon.task.DaemonTask;
/**
*
* @author lcg
* @operate
* @date 2022/3/7 10:36
* @return
*/
public interface ThreadPool{
//执行任务
void execute(DaemonTask daemonTask);
//关闭连接池
void shutdown();
//增加工作数
void addWorkers(int num);
//减少工作数
void removeWorker(int num);
int getTaskSize();
}
package com.dlink.daemon.task;
import com.dlink.daemon.constant.FlinkTaskConstant;
import com.dlink.daemon.pool.DefaultThreadPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class DaemonFactory {
private static final Logger log = LoggerFactory.getLogger(DaemonFactory.class);
public static void start(List<DaemonTaskConfig> configList){
Thread thread = new Thread(() -> {
DefaultThreadPool defaultThreadPool = DefaultThreadPool.getInstance();
for (DaemonTaskConfig config: configList) {
DaemonTask daemonTask = DaemonTask.build(config);
defaultThreadPool.execute(daemonTask);
}
while (true) {
int taskSize = defaultThreadPool.getTaskSize();
try {
Thread.sleep(Math.max(FlinkTaskConstant.MAX_POLLING_GAP / (taskSize + 1), FlinkTaskConstant.MIN_POLLING_GAP));
} catch (InterruptedException e) {
e.printStackTrace();
}
int num = taskSize / 100 + 1;
if (defaultThreadPool.getWorkCount() < num) {
defaultThreadPool.addWorkers(num - defaultThreadPool.getWorkCount() );
}else if(defaultThreadPool.getWorkCount() > num) {
defaultThreadPool.removeWorker(defaultThreadPool.getWorkCount() - num);
}
log.info(" >>> taskSize:" + taskSize + " workCount: "+ defaultThreadPool.getWorkCount());
}
});
thread.start();
}
public static void addTask(DaemonTaskConfig config){
DefaultThreadPool.getInstance().execute(DaemonTask.build(config));
}
}
package com.dlink.daemon.task;
import com.dlink.assertion.Asserts;
import com.dlink.daemon.exception.DaemonTaskException;
import sun.misc.Service;
import java.util.Iterator;
import java.util.Optional;
public interface DaemonTask {
static Optional<DaemonTask> get(DaemonTaskConfig config) {
Asserts.checkNotNull(config, "线程任务配置不能为空");
Iterator<DaemonTask> providers = Service.providers(DaemonTask.class);
while (providers.hasNext()) {
DaemonTask daemonTask = providers.next();
if (daemonTask.canHandle(config.getType())) {
return Optional.of(daemonTask.setConfig(config));
}
}
return Optional.empty();
}
static DaemonTask build(DaemonTaskConfig config) {
Optional<DaemonTask> optionalDriver = DaemonTask.get(config);
if (!optionalDriver.isPresent()) {
throw new DaemonTaskException("不支持线程任务类型【" + config.getType() + "】");
}
DaemonTask daemonTask = optionalDriver.get();
return daemonTask;
}
DaemonTask setConfig(DaemonTaskConfig config);
default boolean canHandle(String type){
return Asserts.isEqualsIgnoreCase(getType(),type);
}
String getType();
void dealTask();
}
package com.dlink.daemon.task;
public class DaemonTaskConfig {
private String type;
private Integer id;
public DaemonTaskConfig() {
}
public DaemonTaskConfig(String type, Integer id) {
this.type = type;
this.id = id;
}
public static DaemonTaskConfig build(String type,Integer id){
return new DaemonTaskConfig(type,id);
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
}
package com.dlink.daemon.task;
import com.dlink.daemon.constant.FlinkTaskConstant;
import com.dlink.model.JobStatus;
import lombok.Data;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Random;
@Data
public class FlinkDaemonTask {
private static final Logger log = LoggerFactory.getLogger(FlinkDaemonTask.class);
public static Random random = new Random(5);
private String id;
private JobStatus status;
private long preDealTime;
private int count;
// @Override
public DaemonTask setConfig(DaemonTaskConfig config) {
return null;
}
// @Override
public String getType() {
return null;
}
// @Override
public void dealTask() {
long gap = 0;
if (this.preDealTime != 0L) {
gap = System.currentTimeMillis() - this.preDealTime;
}
preDealTime = System.currentTimeMillis();
int i = random.nextInt(10);
if(i > 5){
log.info("deal FlinkTask id:" + id + " status: finished count:"+ count + " gap:"+ gap + "ms");
}else {
log.info("deal FlinkTask id:" + id + " status: running count:" +count + " gap:"+ gap + "ms");
//加入等待下次检测
// DefaultThreadPool.getInstance().execute(this);
}
count++;
if(gap < FlinkTaskConstant.TIME_SLEEP){
try {
Thread.sleep(FlinkTaskConstant.TIME_SLEEP);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
public FlinkDaemonTask() {
}
public FlinkDaemonTask(String id) {
this.id = id;
}
public FlinkDaemonTask(String id, JobStatus status) {
this.id = id;
this.status = status;
}
}
package com.dlink.deamon;
package com.dlink.daemon;
/**
* DeamonTest
......@@ -6,5 +6,5 @@ package com.dlink.deamon;
* @author wenmo
* @since 2022/3/2 23:31
*/
public class DeamonTest {
public class DaemonTest {
}
package com.dlink.deamon;
/**
* Deamon
*
* @author wenmo
* @since 2022/3/2 23:31
*/
public interface Deamon {
}
......@@ -24,7 +24,7 @@
<module>dlink-admin</module>
<module>dlink-assembly</module>
<module>dlink-alert</module>
<module>dlink-deamon</module>
<module>dlink-daemon</module>
</modules>
......@@ -269,7 +269,7 @@
</dependency>
<dependency>
<groupId>com.dlink</groupId>
<artifactId>dlink-deamon</artifactId>
<artifactId>dlink-daemon</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment