Commit df3c6187 authored by wenmo's avatar wenmo

实时监控容错机制

parent b7d9b0dc
......@@ -4,7 +4,6 @@ import com.dlink.common.result.ProTableResult;
import com.dlink.common.result.Result;
import com.dlink.job.JobResult;
import com.dlink.model.Task;
import com.dlink.result.SubmitResult;
import com.dlink.service.TaskService;
import com.fasterxml.jackson.databind.JsonNode;
import lombok.extern.slf4j.Slf4j;
......@@ -174,8 +173,12 @@ public class TaskController {
* 重启任务
*/
@GetMapping(value = "/restartTask")
public Result restartTask(@RequestParam Integer id) {
return Result.succeed(taskService.restartTask(id), "操作成功");
public Result restartTask(@RequestParam Integer id, @RequestParam Boolean isOnLine) {
if (isOnLine) {
return taskService.reOnLineTask(id);
} else {
return Result.succeed(taskService.restartTask(id), "重启成功");
}
}
}
package com.dlink.job;
import com.dlink.assertion.Asserts;
import com.dlink.context.SpringContextUtils;
import com.dlink.daemon.constant.FlinkTaskConstant;
import com.dlink.daemon.pool.DefaultThreadPool;
......@@ -12,6 +13,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.DependsOn;
import java.time.Duration;
import java.time.LocalDateTime;
@DependsOn("springContextUtils")
public class FlinkJobTask implements DaemonTask {
......@@ -48,8 +52,9 @@ public class FlinkJobTask implements DaemonTask {
}
}
preDealTime = System.currentTimeMillis();
JobInstance jobInstance = taskService.refreshJobInstance(config.getId());
if (!JobStatus.isDone(jobInstance.getStatus())) {
JobInstance jobInstance = taskService.refreshJobInstance(config.getId(), false);
if ((!JobStatus.isDone(jobInstance.getStatus())) || (Asserts.isNotNull(jobInstance.getFinishTime())
&& Duration.between(jobInstance.getFinishTime(), LocalDateTime.now()).toMinutes() < 1)) {
DefaultThreadPool.getInstance().execute(this);
}
}
......
......@@ -21,4 +21,7 @@ public interface JobInstanceMapper extends SuperMapper<JobInstance> {
List<JobInstanceCount> countHistoryStatus();
List<JobInstance> listJobInstanceActive();
JobInstance getJobInstanceByTaskId(Integer id);
}
......@@ -25,4 +25,6 @@ public interface JobInstanceService extends ISuperService<JobInstance> {
JobInfoDetail getJobInfoDetailInfo(JobInstance jobInstance);
LineageResult getLineage(Integer id);
JobInstance getJobInstanceByTaskId(Integer id);
}
......@@ -43,6 +43,8 @@ public interface TaskService extends ISuperService<Task> {
Result onLineTask(Integer id);
Result reOnLineTask(Integer id);
Result offLineTask(Integer id, String type);
Result cancelTask(Integer id);
......@@ -51,7 +53,7 @@ public interface TaskService extends ISuperService<Task> {
boolean savepointTask(Integer taskId, String savePointType);
JobInstance refreshJobInstance(Integer id);
JobInstance refreshJobInstance(Integer id, boolean isCoercive);
JobInfoDetail refreshJobInfoDetail(Integer id);
}
......@@ -117,4 +117,9 @@ public class JobInstanceServiceImpl extends SuperServiceImpl<JobInstanceMapper,
return LineageBuilder.getLineage(getJobInfoDetail(id).getHistory().getStatement());
}
@Override
public JobInstance getJobInstanceByTaskId(Integer id) {
return baseMapper.getJobInstanceByTaskId(id);
}
}
......@@ -51,33 +51,35 @@
</select>
<select id="countStatus" resultType="com.dlink.model.JobInstanceCount">
select
a.status,
select a.status,
count(1) as counts
from
dlink_job_instance a
from dlink_job_instance a
inner join (
select max(ji.id) as id from dlink_job_instance ji
select max(ji.id) as id
from dlink_job_instance ji
group by ji.task_id
) snap on snap.id = a.id
group by status
</select>
<select id="countHistoryStatus" resultType="com.dlink.model.JobInstanceCount">
select
status,
select status,
count(1) as counts
from
dlink_job_instance
from dlink_job_instance
group by status
</select>
<select id="listJobInstanceActive" resultType="com.dlink.model.JobInstance">
select
*
from
dlink_job_instance
where status not in ('FAILED','CANCELED','FINISHED','UNKNOWN')
select *
from dlink_job_instance
where status not in ('FAILED', 'CANCELED', 'FINISHED', 'UNKNOWN')
order by id desc
</select>
<select id="getJobInstanceByTaskId" resultType="com.dlink.model.JobInstance">
select *
from dlink_job_instance
where task_id = #{id}
order by id desc limit 1
</select>
</mapper>
......@@ -64,6 +64,9 @@ const JobStatus = (props: JobStatusFormProps) => {
</Tag>) : (status === 'RESTARTING') ?
(<Tag icon={<ClockCircleOutlined/>} color="default">
RESTARTING
</Tag>) : (status === 'CREATED') ?
(<Tag icon={<ClockCircleOutlined/>} color="default">
CREATED
</Tag>) :
(<Tag icon={<QuestionCircleOutlined />} color="default">
UNKNOWEN
......
......@@ -15,7 +15,7 @@ import Config from "@/pages/DevOps/JobInfo/Config";
import JobStatus, {isStatusDone} from "@/components/Common/JobStatus";
import {cancelJob, offLineTask, restartJob} from "@/components/Studio/StudioEvent/DDL";
import {CODE} from "@/components/Common/crud";
import JobLifeCycle from "@/components/Common/JobLifeCycle";
import JobLifeCycle, {JOB_LIFE_CYCLE} from "@/components/Common/JobLifeCycle";
import Exception from "@/pages/DevOps/JobInfo/Exception";
import FlinkSQL from "@/pages/DevOps/JobInfo/FlinkSQL";
import Alert from "@/pages/DevOps/JobInfo/Alert";
......@@ -61,7 +61,7 @@ const JobInfo = (props: any) => {
};
const handleSavepoint = (key: string) => {
if(key=='canceljob'){
if (key == 'canceljob') {
Modal.confirm({
title: '停止任务',
content: `确定只停止该作业,不进行 SavePoint 操作吗?`,
......@@ -72,10 +72,10 @@ const JobInfo = (props: any) => {
const res = cancelJob(job?.cluster?.id, job?.instance?.jid);
res.then((result) => {
if (result.code == CODE.SUCCESS) {
message.success(key+"成功");
message.success(key + "成功");
handleGetJobInfoDetail();
} else {
message.error(key+"失败");
message.error(key + "失败");
}
});
}
......@@ -83,19 +83,19 @@ const JobInfo = (props: any) => {
return;
}
Modal.confirm({
title: key+'任务',
title: key + '任务',
content: `确定${key}该作业吗?`,
okText: '确认',
cancelText: '取消',
onOk: async () => {
if (!job?.cluster?.id) return;
const res = offLineTask(job?.instance?.taskId,key);
const res = offLineTask(job?.instance?.taskId, key);
res.then((result) => {
if (result.code == CODE.SUCCESS) {
message.success(key+"成功");
message.success(key + "成功");
handleGetJobInfoDetail();
} else {
message.error(key+"失败");
message.error(key + "失败");
}
});
}
......@@ -110,7 +110,7 @@ const JobInfo = (props: any) => {
cancelText: '取消',
onOk: async () => {
if (!job?.cluster?.id) return;
const res = restartJob(job?.instance?.taskId);
const res = restartJob(job?.instance?.taskId, job?.instance?.step == JOB_LIFE_CYCLE.ONLINE);
res.then((result) => {
if (result.code == CODE.SUCCESS) {
message.success("重新上线成功");
......@@ -126,16 +126,19 @@ const JobInfo = (props: any) => {
let buttons = [
<Button key="back" type="dashed" onClick={handleBack}>返回</Button>,
];
if(!isStatusDone(job?.instance?.status as string)){
buttons.push(<Button key="refresh" icon={<RedoOutlined/>} onClick={handleRefreshJobInfoDetail}/>);
if (!isStatusDone(job?.instance?.status as string)) {
buttons.push(<Button key="flinkwebui">
<Link href={`http://${job?.history?.jobManagerAddress}/#/job/${job?.instance?.jid}/overview`} target="_blank">
FlinkWebUI
</Link></Button>);
}
buttons.push(<Button key="autorestart" type="primary" onClick={handleRestart}>重新{job?.instance?.step == 5?'上线':'启动'}</Button>);
if(!isStatusDone(job?.instance?.status as string)){
buttons.push(<Button key="autostop" type="primary" danger onClick={()=>{handleSavepoint('cancel')}}>{job?.instance?.step == 5?'下线':'智能停止'}</Button>);
buttons.push(<Button key="autorestart" type="primary"
onClick={handleRestart}>重新{job?.instance?.step == 5 ? '上线' : '启动'}</Button>);
if (!isStatusDone(job?.instance?.status as string)) {
buttons.push(<Button key="autostop" type="primary" danger onClick={() => {
handleSavepoint('cancel')
}}>{job?.instance?.step == 5 ? '下线' : '智能停止'}</Button>);
buttons.push(<Dropdown
key="dropdown"
trigger={['click']}
......@@ -256,16 +259,16 @@ const JobInfo = (props: any) => {
<ProCard>
{tabKey === 'base' ? <BaseInfo job={job}/> : undefined}
{tabKey === 'config' ? <Config job={job}/> : undefined}
{tabKey === 'cluster' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'snapshot' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'cluster' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'snapshot' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'exception' ? <Exception job={job}/> : undefined}
{tabKey === 'log' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'optimize' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'log' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'optimize' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'flinksql' ? <FlinkSQL job={job}/> : undefined}
{tabKey === 'datamap' ? <DataMap job={job} /> : undefined}
{tabKey === 'olap' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'version' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE} /> : undefined}
{tabKey === 'alert' ? <Alert job={job} /> : undefined}
{tabKey === 'datamap' ? <DataMap job={job}/> : undefined}
{tabKey === 'olap' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'version' ? <Empty image={Empty.PRESENTED_IMAGE_SIMPLE}/> : undefined}
{tabKey === 'alert' ? <Alert job={job}/> : undefined}
</ProCard>
</PageContainer>
);
......
......@@ -773,6 +773,12 @@ export default (): React.ReactNode => {
<li>
<Link>新增 Hive 数据源注册、元数据、查询和执行</Link>
</li>
<li>
<Link>新增 作业剪切和粘贴</Link>
</li>
<li>
<Link>新增 实时任务监控容错机制</Link>
</li>
</ul>
</Paragraph>
</Timeline.Item>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment