【feat】项目初始化

This commit is contained in:
Kris 2025-03-28 17:52:09 +08:00
commit 158dea0bac
810 changed files with 53082 additions and 0 deletions

148
.gitignore vendored Normal file
View File

@ -0,0 +1,148 @@
.settings/
.project
.classpath
target/
node_modules/
# Created by .ignore support plugin (hsz.mobi)
### Java template
# Compiled class file
*.class
# Log file
#*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
#*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
### Maven template
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.mvn/wrapper/maven-wrapper.jar
### Scala template
*.class
#*.log
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
*.iml
.idea
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
./logs

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 zhuhuipei
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

278
README.md Normal file
View File

@ -0,0 +1,278 @@
## 该项目获得 [Flink Forward Asia Hackathon (2021) 季军 ](https://mp.weixin.qq.com/s/J2OkX_D34ktfwSfhwdmXYQ)
**国内文章备用地址因为github上面 图片效果可能看不到,原因你懂的 哈哈)**
https://xie.infoq.cn/article/1af0cb75be056fea788e6c86b
**github地址** https://github.com/zhp8341/flink-streaming-platform-web
**国内gitee 地址** https://gitee.com/zhuhuipei/flink-streaming-platform-web
### 如需[技术付费服务点击查看](/docs/service.md)
## 一、简介
flink-streaming-platform-web系统是基于[Apache Flink](https://flink.apache.org) 封装的一个可视化的、轻量级的flink web客户端系统用户只需在web
界面进行sql配置就能完成流计算任务。
**主要功能**:包含任务配置、启/停任务、告警、日志等功能支持sql语法提示格式化、sql语句校验。
**目的**:减少开发、降低成本 完全实现sql化 流计算任务。 😂
该项目获得 [Flink Forward Asia Hackathon (2021) 季军 ](https://mp.weixin.qq.com/s/J2OkX_D34ktfwSfhwdmXYQ)
[点击查看WEB页面功能显示效果](/docs/img.md)
**如需技术支持[详见](/docs/service.md)**
### 1、主要功能
* **[1] 任务支持单流 、双流、 单流与维表等。**
* **[2] 支持本地模式、yarn-per模式、STANDALONE模式 Application模式**
* **[3] 支持catalog、hive。**
* **[4] 支持自定义udf、连接器等,完全兼容官方连接器。**
* **[5] 支持sql的在线开发语法提示格式化。**
* **[6] 支持钉钉告警、自定义回调告警、自动拉起任务。**
* **[7] 支持自定义Jar提交任务。**
* **[8] 支持多版本flink版本需要用户编译对应flink版本。**
* **[9] 支持自动、手动savepoint备份并且从savepoint恢复任务。**
* **[10] 支持批任务如hive。**
* **[11] 连接器、udf等三jar管理**
**目前flink版本已经升级到1.16.7**
**如果您觉得还不错请在右上角点一下star 谢谢 🙏 大家的支持是开源最大动力**
### 2、效果及源码文档
1、 [点击查看WEB页面功能显示效果](/docs/img.md)
2、 [源码讲解文档](/docs/source.md)
**如需技术支持[详见](/docs/service.md)**
## 二、环境搭建及安装
1、 [Flink 和 flink-streaming-platform-web 安装部署](/docs/deploy.md)
2、 [在IDEA中执行运行调试](/docs/idea-run.md)
3、 [docker容器化制作镜像(参考)](/docs/tristan-deploy-use)
4、[flink-streaming-platform-web 体验基于docker适合体验者](/docs/docker-demo.md)
5、关于k8s理论上是可行的需要结合flink版本或者hadoop版本定制镜像
**如需技术支持[详见](/docs/service.md)**
## 三、功能介绍
### 3.1 配置操作
1、[sql任务配置使用手册](/docs/manual-sql.md)
2、[jar任务配置使用手册](/docs/manual-jar.md)
### 3.2 sql配置demo
1、[demo1 单流kafka写入mysqld 参考 ](/docs/sql_demo/demo_1.md)
2、[demo2 双流kafka写入mysql 参考](/docs/sql_demo/demo_2.md)
3、[demo3 kafka和mysql维表实时关联写入mysql 参考](/docs/sql_demo/demo_3.md)
4、[demo4 滚动窗口 ](/docs/sql_demo/demo_4.md)
5、[demo5 滑动窗口](/docs/sql_demo/demo_5.md)
6、[demo6 JDBC CDC的使用示例](/docs/sql_demo/demo_6.md)
7、[demo7 datagen简介](/docs/sql_demo/demo_datagen.md)
8、[catalog 使用示例](/docs/catalog.md)
9、[hive批任务 使用示例](/docs/sql_demo/demo_batch.md)
### 3.2 hello-word demo
**请使用下面的sql进行环境测试 用于新用户跑一个hello word 对平台有个感知的认识**
```sql
CREATE TABLE source_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'datagen',
'rows-per-second'='5'
);
CREATE TABLE print_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'print'
);
insert into print_table select f0,f1,f2 from source_table;
```
**官方相关连接器下载**
请移步 https://nightlies.apache.org/flink/flink-docs-release-1.14/zh/docs/connectors/table/overview/
## 四、支持flink sql官方语法
| 支持的sql语法 |
| -------- |
| INSERT INTO |
| INSERT OVERWRITE |
| CREATE TABLE |
| CREATE FUNCTION |
| CREATE VIEW |
| USE CATALOG |
| DROP |
| ALTER |
| SHOW CATALOGS |
| SHOW DATABASES |
| SHOW TABLES |
| SHOW FUNCTIONS |
| CREATE CATALOG |
| SET |
| SELECT (不支持) |
## 五、其他
1、由于hadoop集群环境不一样可能导致部署出现困难,整个搭建比较耗时.
2、由于es 、hbase等版本不一样可能需要下载源码重新选择对应版本 源码地址 [https://github.com/zhp8341/flink-streaming-platform-web](https://github.com/zhp8341/flink-streaming-platform-web)
交流和解答
完全按照1.14.3的连接器相关的配置详见
https://nightlies.apache.org/flink/flink-docs-release-1.13/zh/docs/connectors/table/kafka/
如果需要使用到连接器请去官方下载
kafka 连接器 https://nightlies.apache.org/flink/flink-docs-release-1.13/zh/docs/connectors/table/kafka/
**第一种下载连接器后直接放到 flink/lib/目录下就可以使用了**
1、该方案存在jar冲突可能特别是连接器多了以后
2、在非yarn模式下每次新增jar需要重启flink集群服务器
**第二种放到http的服务下填写到三方地址**
公司内部建议放到内网的某个http服务
http://ccblog.cn/jars/flink-connector-jdbc_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-sql-connector-kafka_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-streaming-udf.jar
http://ccblog.cn/jars/mysql-connector-java-5.1.25.jar
如果使用jar管理功能 可直接填写jar包名字
flink-connector-jdbc_2.11-1.12.0.jar
flink-streaming-udf.jar
mysql-connector-java-5.1.25.jar
![图片](http://img.ccblog.cn/flink/9.png)
多个url使用换行
**自定义连接器打包的时候需要打成shade 并且解决jar的冲突**
**个人建议使用第二种方式每个任务之间jar独立如果把所有连接器放到lib 可能会和其他任务的jar冲突**
**公用的可以放到flink/lib目录里面 如mysql驱动 kafka连接器等**
## 六、问题
1、 [可能遇到的问题和解决方案](/docs/question.md)
## 七、RoadMap
1、批量任务将加入调度系统
## 八、源码编译
[根据自己的flink版本重新编译web客户端](/docs/compile.md)
## 九、联系方式
请加钉钉群: 34315096
钉钉
![钉钉群](./docs/img/dd.png)
## 十、使用情况
[2021-03-18 和 2021-03-19 两天 做的调研 ](/docs/img2.md)
## 十一、 鸣谢
感谢文香炯(湖南草花互动科技股份公司)贡献前端代码 https://gitee.com/wenxiangjiong/flink-streaming-platform-web.git
## 十二、 技术支持
### 如需[技术付费服务点击查看](/docs/service.md)
## 十三、 捐赠
![支付宝](./docs/img/zfb.png)
![微信](./docs/img/wx.png)

108
checkstyle.xml Normal file
View File

@ -0,0 +1,108 @@
<?xml version="1.0"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<module name="SuppressionCommentFilter">
</module>
<!-- <module name="NewlineAtEndOfFile"/>-->
<module name="FileLength">
<property name="max" value="1500"/>
</module>
<module name="TreeWalker">
<module name="FileContentsHolder"/>
<module name="IllegalImport"/>
<module name="RedundantImport"/>
<module name="UnusedImports" />
<module name="LocalFinalVariableName" />
<module name="LocalVariableName" />
<module name="PackageName">
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$" />
<message key="name.invalidPattern" value="PackageName ''{0}'' should fit ''{1}''format."/>
</module>
<module name="StaticVariableName" />
<module name="TypeName">
<property name="severity" value="warning"/>
<message key="name.invalidPattern" value="name ''{0}'' should fit ''{1}''format."/>
</module>
<module name="MemberName" />
<module name="MethodName" />
<module name="ParameterName " />
<module name="ConstantName" />
<!-- <module name="ArrayTypeStyle"/>-->
<module name="UpperEll"/>
<module name="LineLength">
<property name="max" value="140" />
</module>
<module name="MethodLength">
<property name="tokens" value="METHOD_DEF" />
<property name="max" value="90" />
</module>
<module name="ParameterNumber">
<property name="max" value="5" />
<property name="ignoreOverriddenMethods" value="true"/>
<property name="tokens" value="METHOD_DEF" />
</module>
<module name="MethodParamPad" />
<module name="TypecastParenPad" />
<module name="NoWhitespaceAfter"/>
<module name="NoWhitespaceBefore"/>
<module name="OperatorWrap"/>
<module name="ParenPad"/>
<module name="WhitespaceAfter"/>
<module name="WhitespaceAround"/>
<module name="ModifierOrder"/>
<module name="RedundantModifier"/>
<module name="AvoidNestedBlocks"/>
<module name="EmptyBlock"/>
<module name="LeftCurly"/>
<module name="NeedBraces"/>
<module name="RightCurly"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="HiddenField">
<property name="tokens" value="VARIABLE_DEF"/>
</module>
<module name="InnerAssignment"/>
<module name="MissingSwitchDefault"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<module name="FinalClass"/>
<module name="InterfaceIsType"/>
<module name="VisibilityModifier">
<property name="packageAllowed" value="true"/>
<property name="protectedAllowed" value="true"/>
</module>
<module name="StringLiteralEquality"/>
<module name="NestedForDepth">
<property name="max" value="2"/>
</module>
<module name="NestedIfDepth">
<property name="max" value="3"/>
</module>
<module name="ReturnCount">
<property name="max" value="6"/>
</module>
<module name="NestedTryDepth ">
<property name="max" value="4"/>
</module>
<!-- <module name="SuperClone" />-->
<module name="SuperFinalize" />
</module>
</module>

124
deployer/pom.xml Normal file
View File

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-streaming-platform-web</artifactId>
<groupId>com.streaming.platform.web</groupId>
<version>1.1</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>deployer</artifactId>
<version>${flink_streaming_version}</version>
<dependencies>
<dependency>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-core</artifactId>
<version>${flink_streaming_version}</version>
</dependency>
<dependency>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-web</artifactId>
<version>${flink_streaming_version}</version>
</dependency>
<dependency>
<groupId>io.prometheus.jmx</groupId>
<artifactId>jmx_prometheus_javaagent</artifactId>
<version>0.16.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<version>3.0.5</version>
<configuration>
<!-- 设置分析工作的等级可以为Min、Default和Max -->
<effort>Low</effort>
<!-- Low、Medium和High (Low最严格) High只扫描严重错误。建议用Medium-->
<threshold>High</threshold>
<failOnError>true</failOnError>
<includeTests>true</includeTests>
<skip>true</skip>
</configuration>
<executions>
<execution>
<id>run-findbugs</id>
<!-- 在package(也可设为compile) 阶段触发执行findbugs检查比如执行 mvn clean package -->
<phase>package</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- deploy模块的packaging通常是jar如果项目中没有java 源代码或资源文件,加上这一段配置使项目能通过构建 -->
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<addMavenDescriptor>true</addMavenDescriptor>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<!-- 这是最新版本,推荐使用这个版本 -->
<version>3.1.1</version>
<executions>
<execution>
<id>assemble</id>
<goals>
<goal>single</goal>
</goals>
<phase>package</phase>
</execution>
</executions>
<configuration>
<finalName>projectname</finalName>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dev</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<!-- maven assembly插件需要一个描述文件 来告诉插件包的结构以及打包所需的文件来自哪里 -->
<descriptors>
<descriptor>${basedir}/src/main/assembly/dev.xml</descriptor>
</descriptors>
<finalName>flink-streaming-platform-web</finalName>
<outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,131 @@
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>dist</id>
<formats>
<format>tar.gz</format>
</formats>
<includeBaseDirectory>true</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>.</directory>
<outputDirectory>/</outputDirectory>
<includes>
<include>README*</include>
</includes>
</fileSet>
<fileSet>
<directory>./src/main/bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>**/*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>./src/main/conf</directory>
<outputDirectory>/conf</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<directory>./src/main/resources</directory>
<outputDirectory>/conf</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<directory>target</directory>
<outputDirectory>logs</outputDirectory>
<excludes>
<exclude>**/*</exclude>
</excludes>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>lib</outputDirectory>
<scope>runtime</scope>
<excludes>
<exclude>io.*:*</exclude>
<exclude>org.*:*</exclude>
<exclude>net.*:*</exclude>
<exclude>ch.*:*</exclude>
<exclude>jakarta.*:*</exclude>
<exclude>com.streaming.platform.web:flink-streaming-commom</exclude>
<exclude>com.streaming.platform.web:flink-streaming-validation</exclude>
<exclude>com.alibaba.*:*</exclude>
<exclude>com.esotericsoftware.*:*</exclude>
<exclude>com.alibaba:*</exclude>
<exclude>com.github.*:*</exclude>
<exclude>com.typesafe.*:*</exclude>
<exclude>com.typesafe:*</exclude>
<exclude>com.twitter.*:*</exclude>
<exclude>com.twitter:*</exclude>
<exclude>com.jayway.*:*</exclude>
<exclude>com.github.*:*</exclude>
<exclude>com.fasterxml.*:*</exclude>
<exclude>com.google.*:*</exclude>
<exclude>com.google:*</exclude>
<exclude>com.fasterxml.*:*</exclude>
<exclude>com.fasterxml:*</exclude>
<exclude>com.zaxxer:*</exclude>
<exclude>com.zaxxer.*:*</exclude>
<exclude>com.tdunning:*</exclude>
<exclude>com.tdunning.*:*</exclude>
<exclude>com.carrotsearch:*</exclude>
<exclude>com.carrotsearch.*:*</exclude>
<exclude>com.yammer.*:*</exclude>
<exclude>com.thoughtworks.*:*</exclude>
<exclude>com.mchange:*</exclude>
<exclude>org.apache.*:*</exclude>
<exclude>org.apache:*</exclude>
<exclude>commons-collections:*</exclude>
<exclude>cn.hutool:*</exclude>
<exclude>commons-compiler:*</exclude>
<exclude>commons-codec:*</exclude>
<exclude>commons-io:*</exclude>
<exclude>jakarta.annotation:*</exclude>
<exclude>commons-cli:*</exclude>
<exclude>joda-time:*</exclude>
<exclude>mysql:*</exclude>
<exclude>commons-lang:*</exclude>
<exclude>commons-logging:*</exclude>
<exclude>io.netty:*</exclude>
<exclude>log4j:*</exclude>
<exclude>commons-httpclient:*</exclude>
<exclude>com.yammer.*:*</exclude>
<exclude>com.lmax:*</exclude>
<exclude>junit:junit:*</exclude>
<exclude>io.dropwizard.*:*</exclude>
<exclude>com.jamesmurty.*:*</exclude>
<exclude>commons-digester:commons-digester:*</exclude>
<exclude>javax.inject:javax.inject:*</exclude>
<exclude>aopalliance:aopalliance:*</exclude>
<exclude>xmlenc:xmlenc:*</exclude>
<exclude>com.jcraft:jsch:*</exclude>
<exclude>com.sun.jersey:jersey-client:*</exclude>
<exclude>commons-beanutils:commons-beanutils:*</exclude>
<exclude>commons-beanutils:commons-beanutils:*</exclude>
<exclude>commons-*:*:*</exclude>
<exclude>com.sun.*:*:*</exclude>
<exclude>javax.*:*:*</exclude>
<exclude>com.streaming.platform.web:flink-streaming-commom</exclude>
<exclude>com.streaming.platform.web:flink-streaming-web-config</exclude>
<exclude>com.streaming.platform.web:flink-streaming-web-alarm</exclude>
<exclude>com.streaming.platform.web:flink-streaming-web-common</exclude>
</excludes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,96 @@
#! /bin/bash
#具体执行哪个步骤
ACTION=$1
echo "开始执行脚本 启动参数 $1"
##source /etc/profile
set -e
# Find the java binary
if [ -n "${JAVA_HOME}" ]; then
JAVA_RUN="${JAVA_HOME}/bin/java"
else
if [ `command -v java` ]; then
JAVA_RUN="java"
else
echo "JAVA_HOME is not set" >&2
exit 1
fi
fi
curr_path=`pwd`
shell_path=$(cd $(dirname $0); pwd)
cd ${shell_path}
echo "JAVA_HOME= ${JAVA_HOME}"
##变量设置##
env=prod
project=$(find "../lib" -regex ".*flink-streaming-web.*.jar")
echo "project= $project"
time=$(date "+%Y%m%d-%H%M%S")
##JAVA_OPTS设置
JAVA_OPTS=" -javaagent:./jmx_prometheus_javaagent-0.17.1.jar=12345:./jmx-config.yaml -Xmx1888M -Xms1888M -Xmn1536M -XX:MaxMetaspaceSize=512M -XX:MetaspaceSize=512M -XX:+UseConcMarkSweepGC -Xdebug -Xrunjdwp:transport=dt_socket,address=9901,server=y,suspend=n -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -Dcom.sun.management.jmxremote.port=8999 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses -XX:+CMSClassUnloadingEnabled -XX:+ParallelRefProcEnabled -XX:+CMSScavengeBeforeRemark -XX:ErrorFile=../logs/hs_err_pid%p.log -XX:HeapDumpPath=../logs -XX:+HeapDumpOnOutOfMemoryError"
start(){
echo "开始启动服务 app_name=$project "
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
echo $pid
if [ -z $pid ]
then
echo "开始启动进程执行命令 java $JAVA_OPTS -jar $project --spring.profiles.active=$env --spring.config.additional-location=../conf/application.properties "
java $JAVA_OPTS -jar $project --spring.profiles.active=$env --spring.config.additional-location=../conf/application.properties >/dev/null 2>&1 &
sleep 5
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
if [ -z $pid ]
then
echo "启动应用进程失败 请手动执行一下 java -jar $project --spring.profiles.active=$env --spring.config.additional-location=../conf/application.properties "
else
echo "启动成功 pid=" $pid
fi
echo "可通过命令 tail -fn 300 ../logs/info.log 查看web日志"
else
echo " $project 进程已经存 pid=" $pid
fi
}
stop()
{
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
echo "进程 $pid"
echo "------>Check pid of $project"
if [ -z "$pid" ]
then
echo "------>APP_NAME process [$project] is already stopped"
else
for pid in ${pid[*]}
do
echo "------>Kill process which pid=$pid"
/bin/kill $pid
done
sleep 5
fi
}
restart()
{
stop;
start;
}
case "$ACTION" in
restart)
cp $project $project$time
restart
;;
start)
start
;;
stop)
stop
;;
esac
cd ${curr_path}

View File

@ -0,0 +1,87 @@
#!/usr/bin/env bash
#具体执行哪个步骤
ACTION=$1
##变量设置##
env=docker
project="/data/projects/flink-streaming-platform-web/lib/flink-streaming-web-1.5.0.RELEASE.jar"
##JAVA_OPTS设置
JAVA_OPTS=" -Duser.timezone=GMT+8 -Xmx1888M -Xms1888M -Xmn1536M -XX:MaxMetaspaceSize=512M -XX:MetaspaceSize=512M -XX:+UseConcMarkSweepGC -Xdebug -Xrunjdwp:transport=dt_socket,address=9901,server=y,suspend=n -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -Dcom.sun.management.jmxremote.port=8999 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses -XX:+CMSClassUnloadingEnabled -XX:+ParallelRefProcEnabled -XX:+CMSScavengeBeforeRemark -XX:ErrorFile=/data/projects/flink-streaming-platform-web//logs_jvm/hs_err_pid%p.log -XX:HeapDumpPath=/data/projects/flink-streaming-platform-web//logs_jvm/ -XX:+HeapDumpOnOutOfMemoryError"
start(){
echo "开始启动服务 app_name=$project "
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
echo $pid
if [ -z $pid ]
then
echo "开始启动进程执行命令 java $JAVA_OPTS -jar $project --spring.profiles.active=$env"
java $JAVA_OPTS -jar $project --spring.profiles.active=$env --spring.config.additional-location=/data/projects/flink-streaming-platform-web/conf/application.properties >/dev/null 2>&1 &
sleep 20
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
if [ -z $pid ]
then
echo "启动应用进程失败 请手动执行一下 java -jar $project --spring.profiles.active=$env"
else
echo "启动成功 pid=" $pid
fi
echo "可通过命令 tail -fn 300 /data/projects/flink-streaming-platform-web/logs/info.log 查看web日志"
else
echo " $project 进程已经存 pid=" $pid
fi
}
stop()
{
pid=$(ps x | grep $project | grep -v grep | awk '{print $1}')
echo "进程 $pid"
echo "------>Check pid of $project"
if [ -z "$pid" ]
then
echo "------>APP_NAME process [$project] is already stopped"
else
for pid in ${pid[*]}
do
echo "------>Kill process which pid=$pid"
/bin/kill $pid
done
sleep 30
fi
}
restart()
{
stop;
start;
}
case "$ACTION" in
restart)
cp $project $project$time
restart
;;
start)
start
;;
stop)
stop
;;
esac

View File

@ -0,0 +1,13 @@
lowercaseOutputLabelNames: true
lowercaseOutputName: true
whitelistObjectNames: ["java.lang:type=OperatingSystem"]
blacklistObjectNames: []
rules:
- pattern: 'java.lang<type=OperatingSystem><>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:'
name: os_$1_bytes
type: GAUGE
attrNameSnakeCase: true
- pattern: 'java.lang<type=OperatingSystem><>((?!process_cpu_time)\w+):'
name: os_$1
type: GAUGE
attrNameSnakeCase: true

View File

@ -0,0 +1,4 @@
#docker专用 需要映射docker 配置
spring.datasource.url=jdbc:mysql://10.0.0.2:3306/flink_web_docker?serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8&useSSL=false
spring.datasource.username=root
spring.datasource.password=root

View File

@ -0,0 +1,20 @@
# 如果公司内部使用了nacos 可以将有些配置放到nacos 如果没有请不要填写相关配置
#nacos.core.auth.enabled=true
#nacos.config.bootstrap.enable=true
#nacos.config.server-addr=172.16.167.37:8848
#nacos.config.group=DEFAULT_GROUP
#nacos.config.data-id=flink-web.properties
#nacos.config.namespace=dev
#nacos.config.username=nacos
#nacos.config.password=nacos
#nacos.config.type=properties
#nacos.config.max-retry=10
#nacos.config.auto-refresh=true
#nacos.config.config-retry-time=2333
#nacos.config.config-long-poll-timeout=46000
####jdbc信息
server.port=9084
spring.datasource.url=jdbc:mysql://localhost:3306/flink_web?serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8&useSSL=false
spring.datasource.username=root
spring.datasource.password=root

44
docker/Dockerfile Normal file
View File

@ -0,0 +1,44 @@
FROM centos:7
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
RUN echo 'Asia/Shanghai' >/etc/timezone
RUN yum -y install kde-l10n-Chinese && yum -y reinstall glibc-common
RUN localedef -c -f UTF-8 -i zh_CN zh_CN.utf8
ENV LC_ALL zh_CN.utf8
RUN yum install java-1.8.0-openjdk* -y
RUN mkdir /data/
RUN mkdir /data/projects
WORKDIR /data/projects/
ADD flink-streaming-platform-web.tar.gz /data/projects/
ADD flink-1.13.2.tar /data/projects/
COPY app-start.sh /data/projects/
COPY application-docker.properties /data/projects/flink-streaming-platform-web/conf/
COPY info.log /data/projects/flink-streaming-platform-web/logs/
RUN cd /data/projects/
RUN chown -R root:root flink-streaming-platform-web
RUN chown -R root:root flink-1.13.2
ENTRYPOINT bash app-start.sh
EXPOSE 9084 5007 8081

24
docker/app-start.sh Normal file
View File

@ -0,0 +1,24 @@
cd /data/projects/flink-1.13.2
./bin/start-cluster.sh
env=docker
project="/data/projects/flink-streaming-platform-web/lib/flink-streaming-web-1.5.0.RELEASE.jar"
JAVA_OPTS="-Duser.timezone=GMT+8 -Xmx1888M -Xms1888M -Xmn1536M -XX:MaxMetaspaceSize=512M -XX:MetaspaceSize=512M -XX:+UseConcMarkSweepGC -Xdebug -Xrunjdwp:transport=dt_socket,address=9901,server=y,suspend=n -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -Dcom.sun.management.jmxremote.port=8999 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses -XX:+CMSClassUnloadingEnabled -XX:+ParallelRefProcEnabled -XX:+CMSScavengeBeforeRemark -XX:ErrorFile=/data/projects/flink-streaming-platform-web/logs_jvm/hs_err_pid%p.log -XX:HeapDumpPath=/data/projects/flink-streaming-platform-web/logs_jvm/ -XX:+HeapDumpOnOutOfMemoryError"
echo "start "
java $JAVA_OPTS -jar $project --spring.profiles.active=$env --spring.config.additional-location=/data/projects/flink-streaming-platform-web/conf/application-docker.properties >/dev/null 2>&1 &
tail -fn 300 /data/projects/flink-streaming-platform-web/logs/info.log

View File

@ -0,0 +1,4 @@
#docker专用 需要映射docker 配置
spring.datasource.url=jdbc:mysql://mysql-web:3306/flink_web_docker?serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8&useSSL=false
spring.datasource.username=root
spring.datasource.password=root

1
docker/info.log Normal file
View File

@ -0,0 +1 @@
...............start............

1
docker/read.md Normal file
View File

@ -0,0 +1 @@
docker build --no-cache -t registry.cn-hangzhou.aliyuncs.com/flink-streaming-platform-web/flink-web:flink-1.13.2-20220327 ./

154
docs/catalog.md Normal file
View File

@ -0,0 +1,154 @@
**cataLog操作说明**
## 依懒jar参考官方文档
https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/table/connectors/hive/
不同hive所需jar不一样
官方catalog连接配置说明
https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/table/connectors/hive/#%E8%BF%9E%E6%8E%A5%E5%88%B0hive
`可以把jar放到 lib 下 也可以放到http服务器上 然后在使用的时候添加http服务。`
## demo1
~~~~ sql
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'zhp',
'hive-conf-dir' = '/Users/huipeizhu/hive-conf'
);
USE CATALOG testmyhive;
CREATE TABLE source_table_01 (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'datagen',
'rows-per-second'='5'
);
CREATE TABLE print_table_01 (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'print'
);
insert into print_table_01 select f0,f1,f2 from source_table_01;
SHOW TABLES;
SHOW FUNCTIONS;
SHOW CATALOGS;
SHOW DATABASES;
~~~~
## demo2
如果已经使用过了可以直接
~~~~ sql
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'zhp',
'hive-conf-dir' = '/Users/huipeizhu/hive-conf'
);
USE CATALOG testmyhive;
insert into print_table_01 select f0,f1,f2 from source_table_01;
~~~~
## demo3
https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/table/connectors/hive/hive_read_write.html
流数据结果sink到hive
**注意写到hive必须要开启checkpoint**
~~~~ sql
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'zhp',
'hive-conf-dir' = '/Users/huipeizhu/hive-conf'
);
USE CATALOG testmyhive;
drop table IF EXISTS item_test;
drop table IF EXISTS hive_flink_table;
create table item_test (
itemId BIGINT,
price BIGINT,
proctime AS PROCTIME ()
)with (
'connector' = 'kafka',
'topic' = 'flink-catalog-v1',
'properties.bootstrap.servers'='127.0.0.1:9092',
'properties.group.id'='test-1',
'format'='json',
'scan.startup.mode' = 'earliest-offset'
);
SET table.sql-dialect=hive;
CREATE TABLE hive_flink_table (
itemId BIGINT,
price BIGINT,
ups string
) TBLPROPERTIES (
'sink.rolling-policy.rollover-interval'='1min',
'sink.partition-commit.trigger'='process-time',
'sink.partition-commit.policy.kind'='metastore,success-file'
);
SET table.sql-dialect=default;
insert into hive_flink_table select itemId,price, 'XXXXaaa' as ups from item_test;
~~~~
## kafka 生产者 数据demo 用于测试用
~~~java
public class KafkaSend {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers", "127.0.0.1:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
Map<String, Long> map = new HashMap<>();
for (long i = 0; i <10000 ; i++) {
map.put("itemId", i);
map.put("price", i+1);
producer.send(new ProducerRecord<String, String>("flink-catalog-v1", null, JSON.toJSONString(map)));
producer.flush();
Thread.sleep(1000L);
}
producer.close();
}
}
~~~

17
docs/compile.md Normal file
View File

@ -0,0 +1,17 @@
目前web客户端支持的flink版本是1.16.2,如果需要调整flink版本可下载源码
然后修改pom里面的版本号 https://github.com/zhp8341/flink-streaming-platform-web/blob/master/pom.xml
~~~~
<flink.version>1.12.0</flink.version> <!--flink版本-->
<scala.binary.version>2.11</scala.binary.version> <!--scala版本-->
~~~~
可能调整后导致flink引用的上下不兼容 需要你手动解决下
保存后打包
~~~~
mvn clean package -Dmaven.test.skip=true
~~~~
最后打好的包在 {你的目录}/flink-streaming-platform-web/deployer/target
包名是flink-streaming-platform-web.tar.gz

179
docs/deploy.md Normal file
View File

@ -0,0 +1,179 @@
### 1、环境
操作系统linux **(暂时不支持win系统)**
hadoop版本 2+
**flink 版本 1.12.0** 官方地址: https://ci.apache.org/projects/flink/flink-docs-release-1.12/
jdk版本 jdk1.8
scala版本 2.12
kafka版本 1.0+
mysql版本 5.6+
**如果有flink版本需要可以自己编译 详见下面源码编译**
### 2、应用安装
#### 1、flink客户端安装
下载对应版本
https://www.apache.org/dyn/closer.lua/flink/flink-1.16.2/flink-1.16.2-bin-scala_2.12.tgz 然后解压
a: /flink-1.16.2/conf
**1、YARN_PER模式**
文件下面放入hadoop客户端配置文件
配置hadoop客户端环境 HADOOP_CLASSPATH 环境变量)
~~~~
core-site.xml
yarn-site.xml
hdfs-site.xml
~~~~
**2、LOCAL模式**
**3、STANDALONE模式**
**3、yarn-Application模式**
以上三种模式都需要修改 **flink-conf.yaml** 开启 classloader.resolve-order 并且设置
**classloader.resolve-order: parent-first**
b: /flink-1.12.0/lib hadoop集成
~~~~
下载 flink-shaded-hadoop-2-uber-${xxx}.jar 到lib
地址 https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.7.5-10.0/flink-shaded-hadoop-2-uber-2.7.5-10.0.jar
~~~~
**完毕后执行 export HADOOP_CLASSPATH
~~~~
export HADOOP_CLASSPATH=`hadoop classpath`
~~~~
#### 2、flink-streaming-platform-web安装
##### a:**下载最新版本** 并且解压 https://github.com/zhp8341/flink-streaming-platform-web/releases/
~~~~
tar -xvf flink-streaming-platform-web.tar.gz
~~~~
##### b:执行mysql语句
~~~~
mysql 版本5.6+以上
创建数据库 数据库名flink_web
执行表语句
语句地址 https://github.com/zhp8341/flink-streaming-platform-web/blob/master/docs/sql/flink_web.sql
~~~~
##### c:修改数据库连接配置
~~~~
/flink-streaming-platform-web/conf/application.properties
改成上面建好的mysql地址
~~~~
**关于数据库连接配置 需要看清楚你 useSSL=true 你的mysql是否支持 如果不支持可以直接 useSSL=false**
##### d:启动web
~~~~
cd /XXXX/flink-streaming-platform-web/bin
启动 : sh deploy.sh start
停止 : sh deploy.sh stop
日志目录地址: /XXXX/flink-streaming-platform-web/logs/
~~~~
**一定 一定 一定 要到bin目录下再执行deploy.sh 否则无法启动**
##### e:登录
~~~~
http://${ip或者hostname}:9084/ 如 : http://hadoop003:9084/admin/index
登录号admin 密码 123456
~~~~
##### f:集群
如果需要集群部署模式 简单参考图
![图片](https://img-blog.csdnimg.cn/20201018111339635.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3pocDgzNDE=,size_16,color_FFFFFF,t_70#pic_center)
**备注flink客户端必须和flink-streaming-platform-web应用部署在同一服务器**
##### g:端口/内存修改
web端口修改 在conf下面的 application.properties
**server.port参数 默认是9084**
jmx端口 在启动脚本 deploy.sh 里面
**默认是8999**
**debug端口 9901**
jvm内存修改 都在deploy.sh
**默认是按2G物理内存在分配jvm的 -Xmx1888M -Xms1888M -Xmn1536M -XX:MaxMetaspaceSize=512M -XX:MetaspaceSize=512M**

71
docs/docker-demo.md Normal file
View File

@ -0,0 +1,71 @@
### docker 本机体验版本
#### 备注: docker 这里不介绍了
#### 一、docker-compose文件
创建docker-compose.yml 文件
内容是
~~~~
version: '3'
services:
flink-streaming-platform-web:
container_name: flink-streaming-platform-web-demo
image: registry.cn-hangzhou.aliyuncs.com/flink-streaming-platform-web/flink-web:flink-1.13.2-20220327
ports:
- 8081:8081
- 9084:9084
mysql:
restart: always
image: registry.cn-hangzhou.aliyuncs.com/flink-streaming-platform-web/mysql:mysql-5.7.16-20220327
container_name: mysql-web
ports:
- 3307:3306
environment:
- "MYSQL_ROOT_PASSWORD=root"
- "MYSQL_DATABASE=root"
- "TZ=Asia/Shanghai"
~~~~
按顺序执行下面命令
~~~~
docker-compose up -d mysql
docker-compose up -d flink-streaming-platform-web
docker-compose restart flink-streaming-platform-web
~~~~
注意需要现执行 mysql 再执行 flink-streaming-platform-web
**日志查看 docker-compose logs -f flink-streaming-platform-web**
#### 二、查看
http://127.0.0.1:9084 账号/密码 admin / 123456
登录后直接 提交任务 **test_datagen_simple** 就可以查看效果
另外任务可以在这里看 http://127.0.0.1:8081/#/overview
**注意通过这个demo 让新人对flink-streaming-platform-web应用有个直观的认识 目前docker只是本地模式 没办法上线使用,线上使用还需要自己安装**
#### 三、原始镜像制作文件
[Dockerfile](../docker/Dockerfile)

51
docs/idea-run.md Normal file
View File

@ -0,0 +1,51 @@
# 关于使用IDEA直接运行(免安装)
## 使用IDEA直接运行的好处
方便看在控制台直接看日志方便本地Debug可以快速阅读底层方便参与贡献
## 步骤
```
1、使用git clone 到本地后使用IDEA打开项目
2、修改数据库连接 flink-streaming-web/src/main/resources/application.properties
3、在本地数据库中创建 flink_web 库,并执行 docs/sql/flink_web.sql 构建数据库表
4、执行 mvn clean package -DskipTests
5、在 flink-streaming-platform-web 主目录下创建 lib 文件夹
6、将 flink-streaming-core/target/flink-streaming-core.jar 移动到 刚创建的lib 中
7、将 flink-streaming-core.jar 改名为 flink-streaming-core-1.5.0.RELEASE.jar
8、flink-streaming-web 模块是REST服务模块运行启动类是com.flink.streaming.web.StartApplication
9、启动完成后访问本地: localhost:8180 用户名: admin 密码: 123456
10、系统设置flink_streaming_platform_web_home=你自己的路径/flink-streaming-platform-web/
11、本地启动flink集群./start-cluster.sh 并在系统配置中 flink_rest_http_address为http://localhost:8180/
12、创建任务,并设置运行模式为Local Cluster然后就可以愉快的写sql了
```
## 原理
```
1、平台提交任务是使用的 Process pcs = Runtime.getRuntime().exec(command); 来执行
2、command 类似于,下面这个命令是 平台生成的 使用 yarn 提交 批任务
/Users/gump/dreamware/flink-1.13.1/bin/flink run -yjm 1024m -ytm 1024m -p 1 -yqu default -ynm flink@my_batch_job -yd -m yarn-cluster -c com.flink.streaming.core.JobApplication /Users/gump/study/source/github/flink-streaming-platform-web/lib/flink-streaming-core-1.5.0.RELEASE.jar -sql /Users/gump/study/source/github/flink-streaming-platform-web/sql/job_sql_3.sql -type 2
3、大家可以去分析这个命令其实就是要知道 Flink 的客户端在哪个目录 提交的jar 就是导入到lib下的 flink-streaming-core-1.5.0.RELEASE.jar
4、yarn 的url 貌似只是 日志那里有使用
5、com.flink.streaming.core.JobApplication 这个就是真正运行的JOB,在IDEA 里面也是可以执行的,配置一下参数就可以了,相信大家可以举一反三的
我debug时用的参数参考 -sql /Users/gump/study/source/github/flink-streaming-platform-web/sql/job_sql_1.sql -type 0
这里有个细节core pom中的Flink 依赖包是 provide 的本地要debug job时可以 注释这个刷新Maven
6、我们在任务中配置的SQL会生成在项目的 /sql 目录下 ,也就是上面命令的 -sql 后的路径 -type 是告诉任务是流任务还是 批任务

33
docs/img.md Normal file
View File

@ -0,0 +1,33 @@
~~~~
打不开图片可以绑定一下hosts 试试
# GitHub Start
151.101.184.133 assets-cdn.github.com
151.101.184.133 raw.githubusercontent.com
151.101.184.133 gist.githubusercontent.com
151.101.184.133 cloud.githubusercontent.com
151.101.184.133 camo.githubusercontent.com
151.101.184.133 avatars.githubusercontent.com
151.101.184.133 avatars0.githubusercontent.com
151.101.184.133 avatars1.githubusercontent.com
151.101.184.133 avatars2.githubusercontent.com
151.101.184.133 avatars3.githubusercontent.com
151.101.184.133 avatars4.githubusercontent.com
151.101.184.133 avatars5.githubusercontent.com
151.101.184.133 avatars6.githubusercontent.com
151.101.184.133 avatars7.githubusercontent.com
151.101.184.133 avatars8.githubusercontent.com
151.101.184.133 avatars9.githubusercontent.com
151.101.184.133 avatars10.githubusercontent.com
~~~~
![图片](img_preview/1.png)
![图片](img_preview/2.png)
![图片](img_preview/6.png)
![图片](img_preview/3.png)
![图片](img_preview/4.png)
![图片](img_preview/5.png)
![图片](img_preview/7.png)

BIN
docs/img/dd.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 497 KiB

BIN
docs/img/me.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 712 KiB

BIN
docs/img/wx.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 394 KiB

BIN
docs/img/zfb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 KiB

7
docs/img2.md Normal file
View File

@ -0,0 +1,7 @@
## 2021-03-18 ~ 2021-03-19 两天 做的调研 52份样本数量
![图片](http://img.ccblog.cn/flink/dy-3.jpg)
![图片](http://img.ccblog.cn/flink/dy-1.jpg)
![图片](http://img.ccblog.cn/flink/dy-2.jpg)
![图片](http://img.ccblog.cn/flink/dy-4.jpg)

BIN
docs/img_preview/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 384 KiB

BIN
docs/img_preview/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 387 KiB

BIN
docs/img_preview/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 418 KiB

BIN
docs/img_preview/4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 394 KiB

BIN
docs/img_preview/5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

BIN
docs/img_preview/6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 403 KiB

BIN
docs/img_preview/7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 188 KiB

152
docs/manual-batch.md Normal file
View File

@ -0,0 +1,152 @@
### 1、新增任务配置说明
a: 任务名称(*必选)
~~~~
任务名称不能超过50个字符 并且 任务名称仅能含数字,字母和下划线
~~~~
b: 运行模式
YARN_PER( yarn独立模式 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/yarn_setup.html#run-a-single-flink-job-on-yarn)
STANDALONE独立集群 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/cluster_setup.html
LOCAL(本地集群 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/local.html )
<font color=red size=5>LOCAL 需要在本地单机启动flink 服务 ./bin/start-cluster.sh </font>
c: flink运行配置
<font color=red size=5>1、YARN_PER模式 </font>
~~~~
参数(和官方保持一致)但是只支持 -yD -p -yjm -yn -ytm -ys -yqu(必选)
-ys slot个数。
-yn task manager 数量。
-yjm job manager 的堆内存大小。
-ytm task manager 的堆内存大小。
-yqu yarn队列明
-p 并行度
-yD 如-yD taskmanager.heap.mb=518
详见官方文档
如: -yqu flink -yjm 1024m -ytm 2048m -p 1 -ys 1
~~~~
<font color=red size=5>2、LOCAL模式 </font>
~~~~
无需配置
~~~~
<font color=red size=5>3、STANDALONE模式 </font>
~~~~
-d,--detached If present, runs the job in detached
mode
-p,--parallelism <parallelism> The parallelism with which to run the
program. Optional flag to override the
default value specified in the
configuration.
-s,--fromSavepoint <savepointPath> Path to a savepoint to restore the job
from (for example
hdfs:///flink/savepoint-1537).
其他运行参数可通过 flink -h查看
~~~~
d: 三方地址
~~~~
填写连接器或者udf等jar
如:
http://ccblog.cn/jars/flink-connector-jdbc_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-sql-connector-kafka_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-streaming-udf.jar
http://ccblog.cn/jars/mysql-connector-java-5.1.25.jar
地址填写后 udf可以在sql语句里面直接写
CREATE FUNCTION jsonHasKey as 'com.xx.udf.JsonHasKeyUDF';
~~~~
![图片](http://img.ccblog.cn/flink/9.png)
多个url使用换行
### 或者: 如果使用三方jar管理功能 可直接填写jar包名字
flink-connector-jdbc_2.11-1.12.0.jar
flink-streaming-udf.jar
mysql-connector-java-5.1.25.jar
udf 开发demo 详见 [https://github.com/zhp8341/flink-streaming-udf](https://github.com/zhp8341/flink-streaming-udf)
e: sql语句
#### 提前先在hive test的库下创建好test的表
~~~~
create table test(
id int,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
~~~~
~~~~sql
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'test',
'hive-conf-dir' = '/alidata/server/zhp/catalog/config'
);
USE CATALOG testmyhive;
insert into test.test values(4,'n2');
~~~~
### 2、系统设置
~~~~
系统设置有三个必选项
1、flink-streaming-platform-web应用安装的目录必选
这个是应用的安装目录
如 /root/flink-streaming-platform-web/
2、flink安装目录必选
--flink客户端的目录 如: /usr/local/flink-1.12.0/
3、yarn的rm Http地址
--hadoop yarn的rm Http地址 http://hadoop003:8088/
4、flink_rest_http_address
LOCAL模式使用 flink http的地址
5、flink_rest_ha_http_address
STANDALONE模式 支持HA的 可以填写多个地址 ;用分隔
~~~~
![图片](http://img.ccblog.cn/flink/5.png)

48
docs/manual-jar.md Normal file
View File

@ -0,0 +1,48 @@
## 效果
![图片](http://img.ccblog.cn/flink/1-3.png)
![图片](http://img.ccblog.cn/flink/1-2.png)
## 说明
### 重要自研的jar流计算任务必须先放到http服务上自己搭建一个http服务器
### 或者: 如果使用三方jar管理功能 可直接填写jar包名字
flink-connector-jdbc_2.11-1.12.0.jar
flink-streaming-udf.jar
mysql-connector-java-5.1.25.jar
### 主类名*
org.apache.flink.streaming.examples.socket.SocketWindowWordCount
### 主类jar的http地址*
http://192.168.1.100/jar/SocketWindowWordCount.jar
需要提前将jar放到内部的http服务器上
### 自定义参数主类参数:
主类里面main方法所需的参数 有用户自定义 可以不填写 取决于用户开发的主类
--port 9999 --hostname 192.168.1.100

267
docs/manual-sql.md Normal file
View File

@ -0,0 +1,267 @@
### 1、新增任务配置说明
a: 任务名称(*必选)
~~~~
任务名称不能超过50个字符 并且 任务名称仅能含数字,字母和下划线
~~~~
b: 运行模式
YARN_PER( yarn独立模式 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/yarn_setup.html#run-a-single-flink-job-on-yarn)
STANDALONE独立集群 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/cluster_setup.html
LOCAL(本地集群 https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/ops/deployment/local.html )
<font color=red size=5>LOCAL 需要在本地单机启动flink 服务 ./bin/start-cluster.sh </font>
c: flink运行配置
<font color=red size=5>1、YARN_PER模式 </font>
~~~~
参数(和官方保持一致)但是只支持 -yD -p -yjm -yn -ytm -ys -yqu(必选)
-ys slot个数。
-yn task manager 数量。
-yjm job manager 的堆内存大小。
-ytm task manager 的堆内存大小。
-yqu yarn队列明
-p 并行度
-yD 如-yD taskmanager.heap.mb=518
详见官方文档
如: -yqu flink -yjm 1024m -ytm 2048m -p 1 -ys 1
~~~~
<font color=red size=5>2、LOCAL模式 </font>
~~~~
无需配置
~~~~
<font color=red size=5>3、STANDALONE模式 </font>
~~~~
-d,--detached If present, runs the job in detached
mode
-p,--parallelism <parallelism> The parallelism with which to run the
program. Optional flag to override the
default value specified in the
configuration.
-s,--fromSavepoint <savepointPath> Path to a savepoint to restore the job
from (for example
hdfs:///flink/savepoint-1537).
其他运行参数可通过 flink -h查看
~~~~
d: Checkpoint信息
~~~~
不填默认不开启checkpoint机制 参数只支持
-checkpointInterval
-checkpointingMode
-checkpointTimeout
-checkpointDir
-tolerableCheckpointFailureNumber
-asynchronousSnapshots
-externalizedCheckpointCleanup
如: -asynchronousSnapshots true -checkpointDir hdfs://hcluster/flink/checkpoints/
(注意目前权限)
~~~~
| 参数 | 值 | 说明 |
| -------- | -----: | :----: |
| checkpointInterval | 整数 (如 1000 | 默认每60s保存一次checkpoint 单位毫秒 |
| checkpointingMode | EXACTLY_ONCE 或者 AT_LEAST_ONCE | 一致性模式 默认EXACTLY_ONCE 单位字符 |
| checkpointTimeout | 6000| 默认超时10 minutes 单位毫秒|
| checkpointDir | | 保存地址 如 hdfs://hcluster/flink/checkpoints/ 注意目录权限 |
| tolerableCheckpointFailureNumber | 1 | 设置失败次数 默认一次 |
| asynchronousSnapshots | true 或者 false | 是否异步 |
| externalizedCheckpointCleanup | DELETE_ON_CANCELLATION或者RETAIN_ON_CANCELLATION | 作业取消后检查点是否删除(可不填) |
| stateBackendType | 0 或者 1 或者 2 | 默认1 后端状态 0:MemoryStateBackend 1: FsStateBackend 2:RocksDBStateBackend |
| enableIncremental | true 或者 false | 是否采用增量 只有在 stateBackendType 2模式下才有效果 即RocksDBStateBackend |
**rocksBD 优化配置参数**
https://ci.apache.org/projects/flink/flink-docs-release-1.12/deployment/config.html#advanced-rocksdb-state-backends-options
**源码配置项java类 RocksDBConfigurableOptions**
e: 三方地址
~~~~
填写连接器或者udf等jar
如:
http://ccblog.cn/jars/flink-connector-jdbc_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-sql-connector-kafka_2.11-1.12.0.jar
http://ccblog.cn/jars/flink-streaming-udf.jar
http://ccblog.cn/jars/mysql-connector-java-5.1.25.jar
地址填写后 udf可以在sql语句里面直接写
CREATE FUNCTION jsonHasKey as 'com.xx.udf.JsonHasKeyUDF';
~~~~
![图片](http://img.ccblog.cn/flink/9.png)
多个url使用换行
udf 开发demo 详见 [https://github.com/zhp8341/flink-streaming-udf](https://github.com/zhp8341/flink-streaming-udf)
f: sql语句
![图片](http://img.ccblog.cn/flink/10.png)
![图片](http://img.ccblog.cn/flink/12.png)
**备注: 需要选中对应的代码再点击"格式化代码" 按钮 才有效果 tips: win系统 CTRL+A 全选 mac系统 command+A 全选**
**备注只能校验单个sql语法正确与否, 不能校验上下文之间关系 和catalog语法这张表是否存在 数据类型是否正确等无法校验,总之不能完全保证运行的时候sql没有异常只是能校验出一些语法错误**
[支持catalog](https://github.com/zhp8341/flink-streaming-platform-web/tree/master/docs/catalog.md)
### 2、系统设置
~~~~
系统设置有三个必选项
1、flink-streaming-platform-web应用安装的目录必选
这个是应用的安装目录
如 /root/flink-streaming-platform-web/
2、flink安装目录必选
--flink客户端的目录 如: /usr/local/flink-1.12.0/
3、yarn的rm Http地址
--hadoop yarn的rm Http地址 http://hadoop003:8088/
4、flink_rest_http_address
LOCAL模式使用 flink http的地址
5、flink_rest_ha_http_address
STANDALONE模式 支持HA的 可以填写多个地址 ;用分隔
~~~~
![图片](http://img.ccblog.cn/flink/5.png)
### 3、报警设置
#### a:钉钉告警配置
~~~~
报警设置用于: 当运行的任务挂掉的时候会告警
资料钉钉报警设置官方文档https://help.aliyun.com/knowledge_detail/106247.html
~~~~
安全设置 关键词必须填写: <font color=red size=5> 告警 </font>
![图片](https://img-blog.csdnimg.cn/20201018110534482.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3pocDgzNDE=,size_16,color_FFFFFF,t_70#pic_center)
![图片](https://img-blog.csdnimg.cn/20201018112359232.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3pocDgzNDE=,size_16,color_FFFFFF,t_70#pic_center)
效果图
![图片](https://img-blog.csdnimg.cn/20201018111816869.png#pic_center)
#### b:自定义回调告警
自定义回调告警用于用户可以按照一定的http接口开发自己想要的告警模式 如短信、邮件、微信等
**开发要求**
**url: http://{domain}/alarmCallback URN必须是alarmCallback**
**支持 post get**
| 请求参数 | 说明 |
| -------- | :----: |
| appId | 任务appid |
| jobName | 任务名称 |
| deployMode | 模式 |
具体开发可参考如下代码
https://github.com/zhp8341/flink-streaming-platform-web/blob/master/flink-streaming-web/src/main/java/com/flink/streaming/web/controller/api/ApiController.java
~~~~
@RequestMapping("/alarmCallback")
public RestResult alarmCallback(String appId, String jobName, String deployMode) {
log.info("测试回调 appId={} jobName={} deployMode={}", appId, jobName, deployMode);
//业务逻辑
return RestResult.success();
}
~~~~
#### c:任务自动拉起
如果配置了自动拉起并且检测到集群上的任务挂掉就会再次重启
**配置效果图**
![图片](http://img.ccblog.cn/flink/13.png)
### 请使用下面的sql进行环境测试 用于新用户跑一个hello word 对平台有个感知的认识
```sql
CREATE TABLE source_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'datagen',
'rows-per-second'='5'
);
CREATE TABLE print_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'print'
);
insert into print_table select f0,f1,f2 from source_table;
```
```diff
+ 备注:如果有开发条件的同学可以将错误日志接入你们的日志报警系统
```

110
docs/question.md Normal file
View File

@ -0,0 +1,110 @@
1、
```java
Setting HADOOP_CONF_DIR=/etc/hadoop/conf because no HADOOP_CONF_DIR was set.
Could not build the program from JAR file.
Use the help option (-h or --help) to get help on the command.
解决
export HADOOP_HOME=/etc/hadoop
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HADOOP_CLASSPATH=`hadoop classpath`
source /etc/profile
最好配置成全局变量
```
2
```java
2020-10-02 14:48:22,060 ERROR com.flink.streaming.core.JobApplication - 任务执行失败:
java.lang.IllegalStateException: Unable to instantiate java compiler
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.compile(JaninoRelMetadataProvider.java:434)
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.load3(JaninoRelMetadataProvider.java:375)
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.lambda$static$0(JaninoRelMetadataProvider.java:109)
at org.apache.flink.calcite.shaded.com.google.common.cache.CacheLoader$FunctionToCacheLoader.load(CacheLoader.java:149)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3542)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2323)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2286)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2201)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache.get(LocalCache.java:3953)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3957)
at org.apache.flink.calcite.shaded.com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4875)
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.create(JaninoRelMetadataProvider.java:475)
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.revise(JaninoRelMetadataProvider.java:488)
at org.apache.calcite.rel.metadata.RelMetadataQuery.revise(RelMetadataQuery.java:193)
at org.apache.calcite.rel.metadata.RelMetadataQuery.getPulledUpPredicates(RelMetadataQuery.java:797)
at org.apache.calcite.rel.rules.ReduceExpressionsRule$ProjectReduceExpressionsRule.onMatch(ReduceExpressionsRule.java:298)
at org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:319)
at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:560)
at org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:419)
at org.apache.calcite.plan.hep.HepPlanner.executeInstruction(HepPlanner.java:256)
at org.apache.calcite.plan.hep.HepInstruction$RuleInstance.execute(HepInstruction.java:127)
at org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:215)
at org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:202)
at org.apache.flink.table.planner.plan.optimize.program.FlinkHepProgram.optimize(FlinkHepProgram.scala:69)
at org.apache.flink.table.planner.plan.optimize.program.FlinkHepRuleSetProgram.optimize(FlinkHepRuleSetProgram.scala:87)
at org.apache.flink.table.planner.plan.optimize.program.FlinkChainedProgram$$anonfun$optimize$1.apply(FlinkChainedProgram.scala:62)
at org.apache.flink.table.planner.plan.optimize.program.FlinkChainedProgram$$anonfun$optimize$1.apply(FlinkChainedProgram.scala:58)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractTraversable.foldLeft(Traversable.scala:104)
at org.apache.flink.table.planner.plan.optimize.program.FlinkChainedProgram.optimize(FlinkChainedProgram.scala:57)
at org.apache.flink.table.planner.plan.optimize.StreamCommonSubGraphBasedOptimizer.optimizeTree(StreamCommonSubGraphBasedOptimizer.scala:170)
at org.apache.flink.table.planner.plan.optimize.StreamCommonSubGraphBasedOptimizer.doOptimize(StreamCommonSubGraphBasedOptimizer.scala:90)
at org.apache.flink.table.planner.plan.optimize.CommonSubGraphBasedOptimizer.optimize(CommonSubGraphBasedOptimizer.scala:77)
at org.apache.flink.table.planner.delegation.PlannerBase.optimize(PlannerBase.scala:248)
at org.apache.flink.table.planner.delegation.PlannerBase.translate(PlannerBase.scala:151)
at org.apache.flink.table.api.internal.TableEnvironmentImpl.translate(TableEnvironmentImpl.java:682)
at org.apache.flink.table.api.internal.TableEnvironmentImpl.sqlUpdate(TableEnvironmentImpl.java:495)
at com.flink.streaming.core.JobApplication.callDml(JobApplication.java:138)
at com.flink.streaming.core.JobApplication.main(JobApplication.java:85)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1754)
at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
Caused by: java.lang.ClassCastException: org.codehaus.janino.CompilerFactory cannot be cast to org.codehaus.commons.compiler.ICompilerFactory
at org.codehaus.commons.compiler.CompilerFactoryFactory.getCompilerFactory(CompilerFactoryFactory.java:129)
at org.codehaus.commons.compiler.CompilerFactoryFactory.getDefaultCompilerFactory(CompilerFactoryFactory.java:79)
at org.apache.calcite.rel.metadata.JaninoRelMetadataProvider.compile(JaninoRelMetadataProvider.java:432)
... 60 more
conf/flink-conf.yaml
```
**配置里面 设置 classloader.resolve-order: parent-first**
**主要日志目录**
1、web系统日志
/{安装目录}/flink-streaming-platform-web/logs/
2 、flink客户端命令
${FLINK_HOME}/log/flink-${USER}-client-.log

56
docs/service.md Normal file
View File

@ -0,0 +1,56 @@
## 知识付费服务
**flink-streaming-platform-web是开源项目而且会一直开源并且阶段性的更新目前推出付费服务主要是面向技术支持、企业定制开发项目本身还是开源免费的。
在不影响绝大部分用户正常使用开源项目的前提下,针对特定人群、企业提供适当的付费项目,有助于开源项目更稳健的生存、发展。
如果您认可付费服务带给您的价值,欢迎给彼此一次互惠互利的机会**
## 群里问题解答还是保持和之前一样不收费
## 服务项目
### 一、一对一项目教学、讲解、提问
* **[1] 适用人群:初学者**
* **[2] 项目范围flink-streaming-platform-web项目工程**
* **[3] 服务内容flink-streaming-platform-web 原理、实战,及基础理论介绍及相关问题解答**
* **[4] 形式:远程视频/语音/屏幕共享**
* **[5] 收费标准200元/小时(前一小时不足一小时按一小时算) 每次新增半小时收100元 (不足半小时按半小时算)**
### 二、技术支持
* **[1] 适用人群:企业用户**
* **[2] 项目范围flink-streaming-platform-web**
* **[3] 服务内容:在实际应用过程中的问题咨询,优化建议、项目部署**
* **[4] 形式:远程视频/语音/屏幕共享**
* **[5] 收费标准500元/小时(不足一小时按一小时算)**
### 三、定制开发
* **[1] 适用人群:企业用户**
* **[2] 项目范围flink-streaming-platform-web**
* **[3] 服务内容:定制化服务, 以实际沟通为准**
* **[4] 形式:远程开发**
* **[5] 收费标准1000元/人日 具体工时根据项目评估(付费模式 3-6-1 30%定金 60%开发交付 10%上线)**
### 历史技术
时间 | 时长| 支持内容
--- | :--- | :---
2022-09-04 | 1小时 | flink-streaming-platform-web平台介绍及对方业务需求支持讨论
2022-11-04 | 2小时30分 | flink-streaming-platform-web平台介绍及大数据相关基础知识讲解
2022-11-24 | 2小时30分 | flink-streaming-platform-web平台介绍及环境搭建flink hadoop等
2023-11-14 | 1小时30分 | flink-streaming-platform-web平台介绍及环境搭建大数据相关知识等
### 联系方式
* E-mail: zhuhuipei@163.com
* 微信号: zhp8341 (加的时候 备注:付费服务)
* 个人钉钉
* ![本人钉钉](./img/me.jpg)
钉钉
[钉钉二维码](http://img.ccblog.cn/flink/dd2.png)
http://img.ccblog.cn/flink/dd2.png

95
docs/source.md Normal file
View File

@ -0,0 +1,95 @@
## 整体设计
### 1、设计图
![图片](http://img.ccblog.cn/flink/flink-streaming-platform-web.jpg)
### 2、代码模块简介
1. deploy:
1. 工程打包方式
2. 程序运维脚本:启动、停止、重启
3. web端配置文件 application.properties
2. docs:
1. 项目运行数据库初始化sql
2. sql样例
3. 使用文档等
3. flink-streaming-common: flink流计算相关公共类
4. flink-streaming-core: flink流计算核心模块
1. 流计算任务提交、配置等.
5. flink-streaming-valication: sql校验模块
6. flink-streaming-web: web平台模块
1.
2. 用户管理
3. 日志管理
4. 系统配置等.
7. flink-streaming-web-alarm: web平台报警接口
8. flink-streaming-web-common: web平台模块公共类
1. 各种bean类
2. 各种工具类
3. 各种枚举等.
9. flink-streaming-web-config: web平台配置类
1. 报警、任务、savepoint、等待 各个队列的配置
### 3、各代码模块详细设计思路及流程
1. 任务提交启动流程代码片段.
```java
/**
* Controller 层.
* 加载任务详情
* 加载任务报警配置
* 确定任务运行模式,使用对应的实现类启动任务.接口类:JobServerAO
*/
JobConfigApiController.start(Long id,Long savepointId);
```
```java
/**
* 任务提交主流程
*/
JobYarnServerAOImpl.start(Long id,Long savepointId,String userName){
//1、检查jobConfigDTO 状态等参数
jobBaseServiceAO.checkStart(jobConfigDTO);
//2、将配置的sql 写入本地文件并且返回运行所需参数
JobRunParamDTO jobRunParamDTO=jobBaseServiceAO.writeSqlToFile(jobConfigDTO);
//3、插一条运行日志数据
Long jobRunLogId=jobBaseServiceAO.insertJobRunLog(jobConfigDTO,userName);
//4、变更任务状态变更为启动中 有乐观锁 防止重复提交
jobConfigService.updateStatusByStart(jobConfigDTO.getId(),userName,jobRunLogId,jobConfigDTO.getVersion());
String savepointPath=savepointBackupService.getSavepointPathById(id,savepointId);
//异步提交任务
jobBaseServiceAO.aSyncExecJob(jobRunParamDTO,jobConfigDTO,jobRunLogId,savepointPath);
}
```
```java
/**
* 异步提交任务
*/
JobBaseServiceAOImpl.aSyncExecJob(JobRunParamDTO jobRunParamDTO,JobConfigDTO jobConfigDTO,Long jobRunLogId,String savepointPath){
// 以yarn-per-job为例
case YARN_PER:
//1、构建执行命令
command=CommandUtil.buildRunCommandForYarnCluster(jobRunParamDTO,jobConfigDTO,savepointPath);
//2、提交任务
appId=this.submitJobForYarn(command,jobConfigDTO,localLog);
break;
}
//提交完成后更新状态.
this.updateStatusAndLog(jobConfigDTO,jobRunLogId,jobStatus,localLog.toString(),appId);
```

View File

@ -0,0 +1,12 @@
-- 老版本升级需要的sql
ALTER TABLE job_config ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE job_config_history ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE job_config_history add `job_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '任务类型 0:sql 1:自定义jar' AFTER version ;
ALTER TABLE job_run_log ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE user ADD COLUMN `name` VARCHAR(100) NOT NULL COMMENT '用户姓名' AFTER `username`;
ALTER TABLE `user` ADD COLUMN `status` tinyint(1) NOT NULL COMMENT '1:启用 0: 停用' AFTER stauts; -- 修正status字段命名兼容处理只增加字段
ALTER TABLE `user` modify COLUMN `stauts` tinyint(1) NOT NULL DEFAULT 0 COMMENT '1:启用 0: 停用';
ALTER TABLE `user` modify COLUMN `username` varchar(100) COLLATE utf8mb4_bin NOT NULL COMMENT '用户帐号';
update user set name='系统管理员' where id=1;
update user set status=1 where id=1;
update job_config_history a, job_config b set a.job_type=b.job_type where a.job_config_id=b.id;

225
docs/sql/flink_web.sql Normal file
View File

@ -0,0 +1,225 @@
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
CREATE DATABASE `flink_web` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
USE flink_web;
-- ----------------------------
-- Table structure for alart_log
-- ----------------------------
DROP TABLE IF EXISTS `alart_log`;
CREATE TABLE `alart_log` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL COMMENT 'job_config的id 如果0代表的是测试,',
`job_name` varchar(255) DEFAULT NULL,
`message` varchar(512) DEFAULT NULL COMMENT '消息内容',
`type` tinyint(1) NOT NULL COMMENT '1:钉钉',
`status` tinyint(1) NOT NULL COMMENT '1:成功 0:失败',
`fail_log` text COMMENT '失败原因',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index_job_config_id` (`job_config_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='告警发送情况日志';
-- ----------------------------
-- Table structure for ip_status
-- ----------------------------
DROP TABLE IF EXISTS `ip_status`;
CREATE TABLE `ip_status` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`ip` varchar(64) NOT NULL COMMENT 'ip',
`status` int(11) NOT NULL COMMENT '1:运行 -1:停止 ',
`last_time` datetime DEFAULT NULL COMMENT '最后心跳时间',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) NOT NULL DEFAULT 'sys',
`editor` varchar(32) NOT NULL DEFAULT 'sys',
PRIMARY KEY (`id`),
UNIQUE KEY `index_uk_ip` (`ip`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='web服务运行ip';
-- ----------------------------
-- Table structure for job_config
-- ----------------------------
DROP TABLE IF EXISTS `job_config`;
CREATE TABLE `job_config` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`flink_run_config` varchar(512) NOT NULL COMMENT 'flink运行配置',
`flink_sql` MEDIUMTEXT NOT NULL COMMENT 'sql语句',
`flink_checkpoint_config` varchar(512) DEFAULT NULL COMMENT 'checkPoint配置',
`job_id` varchar(64) DEFAULT NULL COMMENT '运行后的任务id',
`is_open` tinyint(1) NOT NULL COMMENT '1:开启 0: 关闭',
`status` tinyint(1) NOT NULL COMMENT '1:运行中 0: 停止中 -1:运行失败',
`ext_jar_path` varchar(2048) DEFAULT NULL COMMENT 'udf地址已经连接器jar 如http://xxx.xxx.com/flink-streaming-udf.jar',
`last_start_time` datetime DEFAULT NULL COMMENT '最后一次启动时间',
`last_run_log_id` bigint(11) DEFAULT NULL COMMENT '最后一次日志',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号 用于乐观锁',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `uk_index` (`job_name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='flink任务配置表';
ALTER TABLE job_config add `job_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '任务类型 0:sql 1:自定义jar' AFTER version ;
ALTER TABLE job_config add `custom_args` varchar(128) DEFAULT NULL COMMENT '启动jar可能需要使用的自定义参数' AFTER job_type;
ALTER TABLE job_config add `custom_main_class` varchar(128) DEFAULT NULL COMMENT '程序入口类' AFTER custom_args;
ALTER TABLE job_config add `custom_jar_url` varchar(128) DEFAULT NULL COMMENT'自定义jar的http地址 如:http://ccblog.cn/xx.jar' AFTER custom_main_class;
ALTER TABLE job_config ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE job_config ADD COLUMN cron VARCHAR(128) NULL COMMENT '批任务定时调度 如 0/20 * * * * ? 表示每20秒 执行任务 ' AFTER status;
-- ----------------------------
-- Table structure for job_config_history
-- ----------------------------
CREATE TABLE `job_config_history` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL COMMENT 'job_config主表Id',
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`flink_run_config` varchar(512) NOT NULL COMMENT 'flink运行配置',
`flink_sql` mediumtext NOT NULL COMMENT 'sql语句',
`flink_checkpoint_config` varchar(512) DEFAULT NULL COMMENT 'checkPoint配置',
`ext_jar_path` varchar(2048) DEFAULT NULL COMMENT 'udf地址及连接器jar 如http://xxx.xxx.com/flink-streaming-udf.jar',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index_job_config_id` (`job_config_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='flink任务配置历史变更表';
ALTER TABLE job_config_history ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE job_config_history add `job_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '任务类型 0:sql 1:自定义jar' AFTER version ;
ALTER TABLE job_config_history ADD COLUMN cron VARCHAR(128) NULL COMMENT '批任务定时调度 如 0/20 * * * * ? 表示每20秒 执行任务 ' AFTER version;
-- ----------------------------
-- Table structure for job_run_log
-- ----------------------------
DROP TABLE IF EXISTS `job_run_log`;
CREATE TABLE `job_run_log` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL,
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`job_id` varchar(64) DEFAULT NULL COMMENT '运行后的任务id',
`local_log` mediumtext COMMENT '启动时本地日志',
`remote_log_url` varchar(128) DEFAULT NULL COMMENT '远程日志url的地址',
`start_time` datetime DEFAULT NULL COMMENT '启动时间',
`end_time` datetime DEFAULT NULL COMMENT '启动时间',
`job_status` varchar(32) DEFAULT NULL COMMENT '任务状态',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='运行任务日志';
ALTER TABLE job_run_log add `run_ip` varchar(64) DEFAULT NULL COMMENT '任务运行所在的机器' AFTER local_log ;
ALTER TABLE job_run_log ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
-- ----------------------------
-- Table structure for savepoint_backup
-- ----------------------------
DROP TABLE IF EXISTS `savepoint_backup`;
CREATE TABLE `savepoint_backup` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL,
`savepoint_path` varchar(2048) NOT NULL COMMENT '地址',
`backup_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '备份时间',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index` (`job_config_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='savepoint备份地址';
-- ----------------------------
-- Table structure for system_config
-- ----------------------------
DROP TABLE IF EXISTS `system_config`;
CREATE TABLE `system_config` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`key` varchar(128) NOT NULL COMMENT 'key值',
`val` varchar(512) NOT NULL COMMENT 'value',
`type` varchar(12) NOT NULL COMMENT '类型 如:sys alarm',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) NOT NULL DEFAULT 'sys',
`editor` varchar(32) NOT NULL DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='系统配置';
-- ----------------------------
-- Table structure for user
-- ----------------------------
DROP TABLE IF EXISTS `user`;
CREATE TABLE `user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`username` varchar(100) COLLATE utf8mb4_bin NOT NULL COMMENT '用户帐号',
`password` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '密码',
`status` tinyint(1) NOT NULL COMMENT '1:启用 0: 停用',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1:删除 0: 未删除',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) COLLATE utf8mb4_bin DEFAULT 'sys',
`editor` varchar(32) COLLATE utf8mb4_bin DEFAULT 'sys',
PRIMARY KEY (`id`),
UNIQUE KEY `index_uk` (`username`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
ALTER TABLE user ADD COLUMN `name` VARCHAR(100) NOT NULL COMMENT '用户姓名' AFTER `username`;
CREATE TABLE `job_alarm_config`
(
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_id` bigint(11) unsigned NOT NULL COMMENT 'job_config主表id',
`type` tinyint(1) NOT NULL COMMENT '类型 1:钉钉告警 2:url回调 3:异常自动拉起任务',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号 ',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `uk_index_job_id` (`job_id`) USING BTREE
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4 COMMENT ='告警辅助配置表';
-- ----------------------------
-- Records of user 默认密码是 123456
-- ----------------------------
BEGIN;
INSERT INTO `user` VALUES (1, 'admin', '系统管理员', 'e10adc3949ba59abbe56e057f20f883e', 1, 0, '2020-07-10 22:15:04', '2020-07-24 22:21:35', 'sys', 'sys');
COMMIT;
CREATE TABLE `upload_file` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`file_name` varchar(128) DEFAULT NULL COMMENT '文件名字',
`file_path` varchar(512) DEFAULT NULL COMMENT '文件路径',
`type` int(11) NOT NULL DEFAULT '1' COMMENT '1:jar',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='上传文件管理';

View File

@ -0,0 +1,247 @@
CREATE DATABASE IF NOT EXISTS flink_web_docker default charset utf8 COLLATE utf8_general_ci;
use flink_web_docker;
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for alart_log
-- ----------------------------
DROP TABLE IF EXISTS `alart_log`;
CREATE TABLE `alart_log` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL COMMENT 'job_config的id 如果0代表的是测试,',
`job_name` varchar(255) DEFAULT NULL,
`message` varchar(512) DEFAULT NULL COMMENT '消息内容',
`type` tinyint(1) NOT NULL COMMENT '1:钉钉',
`status` tinyint(1) NOT NULL COMMENT '1:成功 0:失败',
`fail_log` text COMMENT '失败原因',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index_job_config_id` (`job_config_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='告警发送情况日志';
-- ----------------------------
-- Table structure for ip_status
-- ----------------------------
DROP TABLE IF EXISTS `ip_status`;
CREATE TABLE `ip_status` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`ip` varchar(64) NOT NULL COMMENT 'ip',
`status` int(11) NOT NULL COMMENT '1:运行 -1:停止 ',
`last_time` datetime DEFAULT NULL COMMENT '最后心跳时间',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) NOT NULL DEFAULT 'sys',
`editor` varchar(32) NOT NULL DEFAULT 'sys',
PRIMARY KEY (`id`),
UNIQUE KEY `index_uk_ip` (`ip`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COMMENT='web服务运行ip';
-- ----------------------------
-- Records of ip_status
-- ----------------------------
BEGIN;
INSERT INTO `ip_status` VALUES (1, '172.17.0.2', 1, '2021-09-24 23:46:00', 0, '2021-09-24 23:38:59', '2021-09-24 23:45:59', 'sys', 'sys');
COMMIT;
-- ----------------------------
-- Table structure for job_alarm_config
-- ----------------------------
DROP TABLE IF EXISTS `job_alarm_config`;
CREATE TABLE `job_alarm_config` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_id` bigint(11) unsigned NOT NULL COMMENT 'job_config主表id',
`type` tinyint(1) NOT NULL COMMENT '类型 1:钉钉告警 2:url回调 3:异常自动拉起任务',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号 ',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `uk_index_job_id` (`job_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='告警辅助配置表';
-- ----------------------------
-- Table structure for job_config
-- ----------------------------
DROP TABLE IF EXISTS `job_config`;
CREATE TABLE `job_config` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`flink_run_config` varchar(512) NOT NULL COMMENT 'flink运行配置',
`flink_sql` mediumtext NOT NULL COMMENT 'sql语句',
`flink_checkpoint_config` varchar(512) DEFAULT NULL COMMENT 'checkPoint配置',
`job_id` varchar(64) DEFAULT NULL COMMENT '运行后的任务id',
`is_open` tinyint(1) NOT NULL COMMENT '1:开启 0: 关闭',
`status` tinyint(1) NOT NULL COMMENT '1:运行中 0: 停止中 -1:运行失败',
`ext_jar_path` varchar(2048) DEFAULT NULL COMMENT 'udf地址已经连接器jar 如http://xxx.xxx.com/flink-streaming-udf.jar',
`last_start_time` datetime DEFAULT NULL COMMENT '最后一次启动时间',
`last_run_log_id` bigint(11) DEFAULT NULL COMMENT '最后一次日志',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号 用于乐观锁',
`job_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '任务类型 0:sql 1:自定义jar',
`custom_args` varchar(128) DEFAULT NULL COMMENT '启动jar可能需要使用的自定义参数',
`custom_main_class` varchar(128) DEFAULT NULL COMMENT '程序入口类',
`custom_jar_url` varchar(128) DEFAULT NULL COMMENT '自定义jar的http地址 如:http://ccblog.cn/xx.jar',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `uk_index` (`job_name`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COMMENT='flink任务配置表';
ALTER TABLE job_config ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
-- ----------------------------
-- Records of job_config
-- ----------------------------
BEGIN;
INSERT INTO `job_config` VALUES (1, 'test_datagen_simple', '任务表述-test_datagen_simple', 'LOCAL', '', 'CREATE TABLE source_table (\n f0 INT,\n f1 INT,\n f2 STRING\n) WITH (\n \'connector\' = \'datagen\',\n \'rows-per-second\'=\'5\'\n);\nCREATE TABLE print_table (\n f0 INT,\n f1 INT,\n f2 STRING\n) WITH (\n \'connector\' = \'print\'\n);\ninsert into print_table select f0,f1,f2 from source_table;\n', '', '', 1, 0, NULL, '2021-09-24 23:15:21', 227, 77, 0, NULL, NULL, NULL, 0, '2021-04-06 10:24:30', '2021-09-24 23:45:30', NULL, 'admin');
COMMIT;
-- ----------------------------
-- Table structure for job_config_history
-- ----------------------------
DROP TABLE IF EXISTS `job_config_history`;
CREATE TABLE `job_config_history` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL COMMENT 'job_config主表Id',
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`flink_run_config` varchar(512) NOT NULL COMMENT 'flink运行配置',
`flink_sql` mediumtext NOT NULL COMMENT 'sql语句',
`flink_checkpoint_config` varchar(512) DEFAULT NULL COMMENT 'checkPoint配置',
`ext_jar_path` varchar(2048) DEFAULT NULL COMMENT 'udf地址及连接器jar 如http://xxx.xxx.com/flink-streaming-udf.jar',
`version` int(11) NOT NULL DEFAULT '0' COMMENT '更新版本号',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index_job_config_id` (`job_config_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COMMENT='flink任务配置历史变更表';
ALTER TABLE job_config_history ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
ALTER TABLE job_config_history add `job_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '任务类型 0:sql 1:自定义jar' AFTER version ;
-- ----------------------------
-- Table structure for job_run_log
-- ----------------------------
DROP TABLE IF EXISTS `job_run_log`;
CREATE TABLE `job_run_log` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL,
`job_name` varchar(64) NOT NULL COMMENT '任务名称',
`deploy_mode` varchar(64) NOT NULL COMMENT '提交模式: standalone 、yarn 、yarn-session ',
`job_id` varchar(64) DEFAULT NULL COMMENT '运行后的任务id',
`local_log` mediumtext COMMENT '启动时本地日志',
`run_ip` varchar(64) DEFAULT NULL COMMENT '任务运行所在的机器',
`remote_log_url` varchar(128) DEFAULT NULL COMMENT '远程日志url的地址',
`start_time` datetime DEFAULT NULL COMMENT '启动时间',
`end_time` datetime DEFAULT NULL COMMENT '启动时间',
`job_status` varchar(32) DEFAULT NULL COMMENT '任务状态',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='运行任务日志';
ALTER TABLE job_run_log ADD COLUMN job_desc VARCHAR(255) NULL COMMENT '任务描述' AFTER job_name;
-- ----------------------------
-- Table structure for savepoint_backup
-- ----------------------------
DROP TABLE IF EXISTS `savepoint_backup`;
CREATE TABLE `savepoint_backup` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`job_config_id` bigint(11) NOT NULL,
`savepoint_path` varchar(2048) NOT NULL COMMENT '地址',
`backup_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '备份时间',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`),
KEY `index` (`job_config_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='savepoint备份地址';
-- ----------------------------
-- Table structure for system_config
-- ----------------------------
DROP TABLE IF EXISTS `system_config`;
CREATE TABLE `system_config` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`key` varchar(128) NOT NULL COMMENT 'key值',
`val` varchar(512) NOT NULL COMMENT 'value',
`type` varchar(12) NOT NULL COMMENT '类型 如:sys alarm',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) NOT NULL DEFAULT 'sys',
`editor` varchar(32) NOT NULL DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COMMENT='系统配置';
-- ----------------------------
-- Records of system_config
-- ----------------------------
BEGIN;
INSERT INTO `system_config` VALUES (1, 'flink_home', '/data/projects/flink/', 'SYS', 0, '2020-11-12 10:42:02', '2021-09-24 22:11:01', 'sys', 'sys');
INSERT INTO `system_config` VALUES (2, 'flink_rest_http_address', 'http://127.0.0.1:8081/', 'SYS', 0, '2020-11-04 11:23:49', '2020-12-16 20:32:33', 'sys', 'sys');
INSERT INTO `system_config` VALUES (3, 'flink_streaming_platform_web_home', '/data/projects/flink-streaming-platform-web/', 'SYS', 0, '2020-10-16 16:08:58', '2021-09-24 22:12:42', 'sys', 'sys');
COMMIT;
-- ----------------------------
-- Table structure for user
-- ----------------------------
DROP TABLE IF EXISTS `user`;
CREATE TABLE `user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`username` varchar(100) COLLATE utf8mb4_bin NOT NULL COMMENT '用户帐号',
`password` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '密码',
`status` tinyint(1) NOT NULL COMMENT '1:启用 0: 停用',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1:删除 0: 未删除',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) COLLATE utf8mb4_bin DEFAULT 'sys',
`editor` varchar(32) COLLATE utf8mb4_bin DEFAULT 'sys',
PRIMARY KEY (`id`),
UNIQUE KEY `index_uk` (`username`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
ALTER TABLE user ADD COLUMN `name` VARCHAR(100) NOT NULL COMMENT '用户姓名' AFTER `username`;
-- ----------------------------
-- Records of user 默认密码是 123456
-- ----------------------------
BEGIN;
INSERT INTO `user` VALUES (1, 'admin', '系统管理员', 'e10adc3949ba59abbe56e057f20f883e', 1, 0, '2020-07-10 22:15:04', '2020-07-24 22:21:35', 'sys', 'sys');
COMMIT;
CREATE TABLE `upload_file` (
`id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
`file_name` varchar(128) DEFAULT NULL COMMENT '文件名字',
`file_path` varchar(512) DEFAULT NULL COMMENT '文件路径',
`type` int(11) NOT NULL DEFAULT '1' COMMENT '1:jar',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`creator` varchar(32) DEFAULT 'sys',
`editor` varchar(32) DEFAULT 'sys',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='上传文件管理';

64
docs/sql_demo/demo_1.md Normal file
View File

@ -0,0 +1,64 @@
## demo1 单流kafka写入mysqld 参考
配置参考:
[jdbc](https://ci.apache.org/projects/flink/flink-docs-release-1.12/dev/table/connectors/jdbc.html)
[kafka](https://ci.apache.org/projects/flink/flink-docs-release-1.12/dev/table/connectors/kafka.html)
触发方式: 针对每条触发一次
source kafka json 数据格式
topic: flink_test
msg: {"day_time": "20201009","id": 7,"amnount":20}
sink mysql 创建语句
```sql
CREATE TABLE sync_test_1 (
`day_time` varchar(64) NOT NULL,
`total_gmv` bigint(11) DEFAULT NULL,
PRIMARY KEY (`day_time`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
```
配置语句
```sql
create table flink_test_1 (
id BIGINT,
day_time VARCHAR,
amnount BIGINT,
proctime AS PROCTIME ()
)
with (
'connector' = 'kafka',
'topic' = 'flink_test',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test1',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
CREATE TABLE sync_test_1 (
day_time string,
total_gmv bigint,
PRIMARY KEY (day_time) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'sync_test_1',
'username' = 'videoweb',
'password' = 'suntek'
);
INSERT INTO sync_test_1
SELECT day_time,SUM(amnount) AS total_gmv
FROM flink_test_1
GROUP BY day_time;
```

95
docs/sql_demo/demo_2.md Normal file
View File

@ -0,0 +1,95 @@
## demo2 双流kafka写入mysql 参考
source kafka json 数据格式
topic flink_test_1 {"day_time": "20201011","id": 8,"amnount":211}
topic flink_test_2 {"id": 8,"coupon_amnount":100}
注意:针对双流中的每条记录都发触发
sink mysql 创建语句
```sql
CREATE TABLE `sync_test_2` (
`id` bigint(11) NOT NULL AUTO_INCREMENT,
`day_time` varchar(64) DEFAULT NULL,
`total_gmv` bigint(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uidx` (`day_time`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
```
配置语句
```sql
create table flink_test_2_1 (
id BIGINT,
day_time VARCHAR,
amnount BIGINT,
proctime AS PROCTIME ()
)
with (
'connector' = 'kafka',
'topic' = 'flink_test_1',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test2-1',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
create table flink_test_2_2 (
id BIGINT,
coupon_amnount BIGINT,
proctime AS PROCTIME ()
)
with (
'connector' = 'kafka',
'topic' = 'flink_test_2',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test2-2',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
CREATE TABLE sync_test_2 (
day_time string,
total_gmv bigint,
PRIMARY KEY (day_time) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'sync_test_2',
'username' = 'videoweb',
'password' = 'suntek'
);
INSERT INTO sync_test_2
SELECT
day_time,
SUM(amnount - coupon_amnount) AS total_gmv
FROM
(
SELECT
a.day_time as day_time,
a.amnount as amnount,
b.coupon_amnount as coupon_amnount
FROM
flink_test_2_1 as a
LEFT JOIN flink_test_2_2 b on b.id = a.id
)
GROUP BY
day_time;
```

114
docs/sql_demo/demo_3.md Normal file
View File

@ -0,0 +1,114 @@
## demo3 kafka和mysql维表实时关联写入mysql 参考
source kafka json 数据格式
topic flink_test_1 {"day_time": "20201011","id": 8,"amnount":211}
dim test_dim
```sql
CREATE TABLE `test_dim` (
`id` bigint(11) NOT NULL,
`coupon_amnount` bigint(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Records of test_dim
-- ----------------------------
BEGIN;
INSERT INTO `test_dim` VALUES (1, 1);
INSERT INTO `test_dim` VALUES (3, 1);
INSERT INTO `test_dim` VALUES (8, 1);
COMMIT;
```
sink mysql 创建语句
```sql
CREATE TABLE `sync_test_3` (
`id` bigint(11) NOT NULL AUTO_INCREMENT,
`day_time` varchar(64) DEFAULT NULL,
`total_gmv` bigint(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uidx` (`day_time`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
```
配置语句
```sql
create table flink_test_3 (
id BIGINT,
day_time VARCHAR,
amnount BIGINT,
proctime AS PROCTIME ()
)
with (
'connector' = 'kafka',
'topic' = 'flink_test_1',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test3',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
create table flink_test_3_dim (
id BIGINT,
coupon_amnount BIGINT
)
with (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'test_dim',
'username' = 'videoweb',
'password' = 'suntek',
'lookup.max-retries' = '3',
'lookup.cache.max-rows' = 1000
);
CREATE TABLE sync_test_3 (
day_time string,
total_gmv bigint,
PRIMARY KEY (day_time) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'sync_test_3',
'username' = 'videoweb',
'password' = 'suntek'
);
INSERT INTO sync_test_3
SELECT
day_time,
SUM(amnount - coupon_amnount) AS total_gmv
FROM
(
SELECT
a.day_time as day_time,
a.amnount as amnount,
b.coupon_amnount as coupon_amnount
FROM
flink_test_3 as a
LEFT JOIN flink_test_3_dim FOR SYSTEM_TIME AS OF a.proctime as b
ON b.id = a.id
)
GROUP BY day_time;
```

87
docs/sql_demo/demo_4.md Normal file
View File

@ -0,0 +1,87 @@
## demo4 滚动窗口
source kafka json 数据格式
topic flink_test_4
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2021-01-05 11:12:12"}
{"username":"zhp","click_url":"https://www.infoq.cn/video/BYSSg4hGR5oZmUFsL8Kb","ts":"2020-01-05 11:12:15"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2020-01-05 11:12:18"}
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2021-01-05 11:12:55"}
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2021-01-05 11:13:25"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2021-01-05 11:13:25"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2021-01-05 11:13:26"}
sink mysql 创建语句
```sql
CREATE TABLE `sync_test_tumble_output` (
`id` bigint(11) NOT NULL AUTO_INCREMENT,
`window_start` datetime DEFAULT NULL,
`window_end` datetime DEFAULT NULL,
`username` varchar(255) DEFAULT NULL,
`clicks` bigint(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
```
配置语句
```sql
-- -- 开启 mini-batch 指定是否启用小批量优化 (相关配置说明 https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/table/config.html
SET table.exec.mini-batch.enabled=true;
-- -- mini-batch的时间间隔即作业需要额外忍受的延迟
SET table.exec.mini-batch.allow-latency=60s;
-- -- 一个 mini-batch 中允许最多缓存的数据
SET table.exec.mini-batch.size=5;
create table user_clicks (
username varchar,
click_url varchar,
ts timestamp,
-- ts BIGINT,
-- ts2 AS TO_TIMESTAMP(FROM_UNIXTIME(ts / 1000, 'yyyy-MM-dd HH:mm:ss')),
WATERMARK FOR ts AS ts - INTERVAL '20' SECOND
)
with (
'connector' = 'kafka',
'topic' = 'flink_test_4',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test4',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
CREATE TABLE sync_test_tumble_output (
window_start TIMESTAMP(3),
window_end TIMESTAMP(3),
username VARCHAR,
clicks BIGINT
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'sync_test_tumble_output',
'username' = 'videoweb',
'password' = 'suntek'
);
INSERT INTO sync_test_tumble_output
SELECT
TUMBLE_START(ts, INTERVAL '60' SECOND) as window_start,
TUMBLE_END(ts, INTERVAL '60' SECOND) as window_end,
username,
COUNT(click_url)
FROM user_clicks
GROUP BY TUMBLE(ts, INTERVAL '60' SECOND), username;
```

89
docs/sql_demo/demo_5.md Normal file
View File

@ -0,0 +1,89 @@
## demo5 滑动窗口
source kafka json 数据格式
topic flink_test_5
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2020-01-05 11:12:12"}
{"username":"zhp","click_url":"https://www.infoq.cn/video/BYSSg4hGR5oZmUFsL8Kb","ts":"2020-01-05 11:12:15"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2020-01-05 11:12:18"}
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2020-01-05 11:12:55"}
{"username":"zhp","click_url":"https://www.infoq.cn/","ts":"2020-01-05 11:13:25"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2020-01-05 11:13:25"}
{"username":"zhp","click_url":"https://www.infoq.cn/talks","ts":"2020-01-05 11:13:26"}
sink mysql 创建语句
```sql
CREATE TABLE `sync_test_hop_output` (
`id` bigint(11) NOT NULL AUTO_INCREMENT,
`window_start` datetime DEFAULT NULL,
`window_end` datetime DEFAULT NULL,
`username` varchar(255) DEFAULT NULL,
`clicks` bigint(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
```
配置语句
```sql
-- -- 开启 mini-batch (相关配置说明 https://ci.apache.org/projects/flink/flink-docs-release-1.10/zh/dev/table/config.html
SET table.exec.mini-batch.enabled=true;
-- -- mini-batch的时间间隔即作业需要额外忍受的延迟
SET table.exec.mini-batch.allow-latency=60s;
-- -- 一个 mini-batch 中允许最多缓存的数据
SET table.exec.mini-batch.size=5;
create table user_clicks (
username varchar,
click_url varchar,
ts timestamp,
-- ts BIGINT,
-- ts2 AS TO_TIMESTAMP(FROM_UNIXTIME(ts / 1000, 'yyyy-MM-dd HH:mm:ss')),
WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
)
with (
'connector' = 'kafka',
'topic' = 'flink_test_5',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'properties.group.id' = 'flink_gp_test5',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '172.25.20.76:2181/kafka'
);
CREATE TABLE sync_test_hop_output (
window_start TIMESTAMP(3),
window_end TIMESTAMP(3),
username VARCHAR,
clicks BIGINT
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://172.25.21.10:3306/flink_web?characterEncoding=UTF-8',
'table-name' = 'sync_test_hop_output',
'username' = 'videoweb',
'password' = 'suntek'
);
--统计每个用户过去1分钟的单击次数每30秒更新1次即1分钟的窗口30秒滑动1次
INSERT INTO sync_test_hop_output
SELECT
HOP_START (ts, INTERVAL '30' SECOND, INTERVAL '1' MINUTE) as window_start,
HOP_END (ts, INTERVAL '30' SECOND, INTERVAL '1' MINUTE) as window_end,
username,
COUNT(click_url)
FROM user_clicks
GROUP BY HOP (ts, INTERVAL '30' SECOND, INTERVAL '1' MINUTE), username;
```

266
docs/sql_demo/demo_6.md Normal file
View File

@ -0,0 +1,266 @@
# 参考
https://github.com/ververica/flink-cdc-connectors/wiki/%E4%B8%AD%E6%96%87%E6%95%99%E7%A8%8B
[基于 Flink SQL CDC的实时数据同步方案](http://www.dreamwu.com/post-1594.html)
# 1. mysql配置
## 1.1. 主从同步配置、数据准备
1、修改配置
[mysqld]
# 前面还有其他配置
# 添加的部分
server-id = 12345
log-bin = mysql-bin
# 必须为ROW
binlog_format = ROW
# 必须为FULLMySQL-5.7后才有该参数
binlog_row_image = FULL
expire_logs_days = 15
2、验证
SHOW VARIABLES LIKE '%binlog%';
3、设置权限
-- 设置拥有同步权限的用户
CREATE USER 'flinkuser' IDENTIFIED BY 'flinkpassword';
-- 赋予同步相关权限
GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'flinkuser';
创建用户并赋予权限成功后使用该用户登录MySQL可以使用以下命令查看主从同步相关信息
SHOW MASTER STATUS
SHOW SLAVE STATUS
SHOW BINARY LOGS
# 2. CDC Streaming ETL
模拟电商公司的订单表和物流表,需要对订单数据进行统计分析,对于不同的信息需要进行关联后续形成订单的大宽表后,交给下游的业务方使用 ES 做数据分析,
这个案例演示了如何只依赖 Flink 不依赖其他组件,借助 Flink 强大的计算能力实时把 Binlog 的数据流关联一次并同步至 ES
## 2.1 实时根据binlog将商品表、订单表、物流表合成宽表写到ES中
## 2.1.1. 数据库建表SQL
```sql
CREATE TABLE products (
id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description VARCHAR(512)
);
ALTER TABLE products AUTO_INCREMENT = 101;
INSERT INTO products
VALUES (default,"scooter","Small 2-wheel scooter"),
(default,"car battery","12V car battery"),
(default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3"),
(default,"hammer","12oz carpenter's hammer"),
(default,"hammer","14oz carpenter's hammer"),
(default,"hammer","16oz carpenter's hammer"),
(default,"rocks","box of assorted rocks"),
(default,"jacket","water resistent black wind breaker"),
(default,"spare tire","24 inch spare tire");
CREATE TABLE orders (
order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
order_date DATETIME NOT NULL,
customer_name VARCHAR(255) NOT NULL,
price DECIMAL(10, 5) NOT NULL,
product_id INTEGER NOT NULL,
order_status BOOLEAN NOT NULL -- 是否下单
) AUTO_INCREMENT = 10001;
INSERT INTO orders
VALUES (default, '2020-07-30 10:08:22', 'Jark', 50.50, 102, false),
(default, '2020-07-30 10:11:09', 'Sally', 15.00, 105, false),
(default, '2020-07-30 12:00:30', 'Edward', 25.25, 106, false);
CREATE TABLE shipments (
shipment_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
order_id INTEGER NOT NULL,
origin VARCHAR(255) NOT NULL,
destination VARCHAR(255) NOT NULL,
is_arrived BOOLEAN NOT NULL
) AUTO_INCREMENT = 1001;;
INSERT INTO shipments
VALUES (default,10001,'Beijing','Shanghai',false),
(default,10002,'Hangzhou','Shanghai',false),
(default,10003,'Shanghai','Hangzhou',false);
```
## 2.1.2. flink配置
```sql
CREATE TABLE products (
id INT,
name STRING,
description STRING
) WITH (
'connector' = 'mysql-cdc',
'hostname' = '172.25.21.29',
'port' = '3306',
'username' = 'flinkuser',
'password' = 'flinkpassword',
'database-name' = 'db_inventory_cdc',
'table-name' = 'products'
);
CREATE TABLE orders (
order_id INT,
order_date TIMESTAMP(0),
customer_name STRING,
price DECIMAL(10, 5),
product_id INT,
order_status BOOLEAN
) WITH (
'connector' = 'mysql-cdc',
'hostname' = '172.25.21.29',
'port' = '3306',
'username' = 'flinkuser',
'password' = 'flinkpassword',
'database-name' = 'db_inventory_cdc',
'table-name' = 'orders'
);
CREATE TABLE shipments (
shipment_id INT,
order_id INT,
origin STRING,
destination STRING,
is_arrived BOOLEAN
) WITH (
'connector' = 'mysql-cdc',
'hostname' = '172.25.21.29',
'port' = '3306',
'username' = 'flinkuser',
'password' = 'flinkpassword',
'database-name' = 'db_inventory_cdc',
'table-name' = 'shipments'
);
CREATE TABLE enriched_orders (
order_id INT,
order_date TIMESTAMP(0),
customer_name STRING,
price DECIMAL(10, 5),
product_id INT,
order_status BOOLEAN,
product_name STRING,
product_description STRING,
shipment_id INT,
origin STRING,
destination STRING,
is_arrived BOOLEAN,
PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'elasticsearch-7',
'hosts' = 'http://172.25.23.15:9401',
'index' = 'enriched_orders'
);
INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id;
```
## 2.1.3. 验证SQL
```sql
--增加记录
INSERT INTO orders VALUES (default, '2020-07-30 15:22:00', 'Jark', 29.71, 104, false);
INSERT INTO shipments VALUES (default,10004,'Shanghai','Beijing',false);
--更新记录
UPDATE orders SET order_status = true WHERE order_id = 10004;
UPDATE shipments SET is_arrived = true WHERE shipment_id = 4;
--删除记录
DELETE FROM orders WHERE order_id = 10004;
```
## 2.1.4. 手工启动脚本
注意:相应的jar包需要到https://maven.aliyun.com/mvn/search下载特别是cdc相关的jar
```shell script
/data1/flink/flink/bin/flink run -d -p 2
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-connector-jdbc_2.11-1.12.0.jar
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-kafka_2.11-1.12.0.jar
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-elasticsearch7_2.11-1.12.0.jar
-c com.flink.streaming.core.JobApplication /data1/flink/flink-streaming-platform-web/lib/flink-streaming-core_flink_1.12.0-1.2.0.RELEASE.jar
-sql /data1/flink/flink-streaming-platform-web/sql/job_sql_8.sql
```
## 2.2. 按天统计销售额
### 2.2.1. flink 配置
```sql
set table.exec.source.cdc-events-duplicate = true;
CREATE TABLE orders (
order_id INT,
order_date TIMESTAMP(0),
customer_name STRING,
price DECIMAL(10, 5),
product_id INT,
order_status BOOLEAN,
PRIMARY KEY (day_str) NOT ENFORCED,
) WITH (
'connector' = 'mysql-cdc',
'hostname' = '172.25.21.29',
'port' = '3306',
'username' = 'flinkuser',
'password' = 'flinkpassword',
'database-name' = 'db_inventory_cdc',
'table-name' = 'orders'
);
CREATE TABLE kafka_gmv (
day_str STRING,
gmv DECIMAL(10, 5),
PRIMARY KEY (day_str) NOT ENFORCED
) WITH (
'connector' = 'kafka',
'topic' = 'flink_test_6',
'scan.startup.mode' = 'earliest-offset',
'properties.group.id' = 'flink_gp_test6',
'properties.bootstrap.servers' = '172.25.20.76:9092',
'format' = 'debezium-json',
'debezium-json.ignore-parse-errors' = 'true',
'debezium-json.timestamp-format.standard' = 'SQL',
'debezium-json.map-null-key.mode' = 'DROP'
);
INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders
WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');
-- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果
CREATE TABLE print_table (
day_str STRING,
gmv DECIMAL(10, 5),
PRIMARY KEY (day_str) NOT ENFORCED
) WITH (
'connector' = 'print'
);
insert into print_table SELECT * FROM kafka_gmv;
```
## 2.2.2. 验证SQL
```sql
UPDATE orders SET order_status = true WHERE order_id = 10001;
UPDATE orders SET order_status = true WHERE order_id = 10002;
UPDATE orders SET order_status = true WHERE order_id = 10003;
INSERT INTO orders
VALUES (default, '2020-07-30 17:33:00', 'Timo', 50.00, 104, true);
UPDATE orders SET price = 40.00 WHERE order_id = 10005;
DELETE FROM orders WHERE order_id = 10005;
```
## 2.2.2. 手工启动脚本
注意: 相应的jar包需要到https://maven.aliyun.com/mvn/search下载特别是cdc相关的jar
不能将kafka 表中的format配置为changelog-json,否则会因为flink1.12.0版本中JsonRowDataSerializationSchema中的方法
JsonRowDataSerializationSchema(RowType rowType, TimestampFormat timestampFormat)被修改了,所以报:
java.lang.NoSuchMethodError: org.apache.flink.formats.json.JsonRowDataSerializationSchema.<init>(Lorg/apache/flink/table/types/logical/RowType;Lorg/apache/flink/formats/json/TimestampFormat;)V
方法不存在
```shell script
/data1/flink/flink/bin/flink run -d -p 2
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-connector-jdbc_2.11-1.12.0.jar
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-kafka_2.11-1.12.0.jar
-c com.flink.streaming.core.JobApplication /data1/flink/flink-streaming-platform-web/lib/flink-streaming-core_flink_1.12.0-1.2.0.RELEASE.jar
-sql /data1/flink/flink-streaming-platform-web/sql/job_sql_8.sql
```

View File

@ -0,0 +1,21 @@
#### 提前先在hive test的库下创建好test的表
~~~~
create table test(
id int,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
~~~~
~~~~sql
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'test',
'hive-conf-dir' = '/alidata/server/zhp/catalog/config'
);
USE CATALOG testmyhive;
insert into test.test values(4,'n2');
~~~~

View File

@ -0,0 +1,55 @@
# 1. datagen简介
在flink 1.11中内置提供了一个DataGen 连接器,主要是用于生成一些随机数,用于在没有数据源的时候,进行流任务的测试以及性能测试等。
- DataGen 连接器允许按数据生成规则进行读取。
- DataGen 连接器可以使用计算列语法。 这使您可以灵活地生成记录。
- DataGen 连接器是内置的。
[具体的使用方法可以先看下官网的概述](https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/dev/table/connectors/datagen.html)
使用时注意如下:
- 目前随机生成只支持基本数据类型数字类型TINYINT、SMALLINT、int、bigint、FLOAT、double、字符串类型VARCHAR、char
以及boolean类型,不支持复杂类型: ArrayMapRow。 请用计算列构造这些类型
- 目前有两种数据生成器,一种是随机生成器(默认),这个是无界的,另一个是序列生成器,是有界的。
- 字段中只要有一个是按序列生成的,也就是有界的,程序就会在序列结束的时候退出。如果所有字段都是随机生成的,则程序最终不会结束。
- 计算列是一个使用 “column_name AS computed_column_expression” 语法生成的虚拟列。
它由使用同一表中其他列的非查询表达式生成,并且不会在表中进行物理存储。例如,一个计算列可以使用 cost AS price * quantity 进行定义,
这个表达式可以包含物理列、常量、函数或变量的任意组合,但这个表达式不能存在任何子查询。
- 在 Flink 中计算列一般用于为 CREATE TABLE 语句定义 时间属性。 处理时间属性 可以简单地通过使用了系统函数 PROCTIME() 的 proc AS PROCTIME() 语句进行定义
#2. 使用示例
## 2.1. 在flink sql-client中使用
### 进入客户端
```shell script
bin/sql-client.sh embedded
bin/sql-client.sh embedded -l 依赖的jar包路径
```
### flink SQL测试
```sql
CREATE TABLE datagen (
f_sequence INT,
f_random INT,
f_random_str STRING,
ts AS localtimestamp,
WATERMARK FOR ts AS ts
) WITH (
'connector' = 'datagen',
-- optional options --
'rows-per-second'='5',
'fields.f_sequence.kind'='sequence',
'fields.f_sequence.start'='1',
'fields.f_sequence.end'='1000',
'fields.f_random.min'='1',
'fields.f_random.max'='1000',
'fields.f_random_str.length'='10'
);
select * from datagen;
```
## 2.2 参数解释
DDL的with属性中除了connector是必填之外其他都是可选的。
rows-per-second 每秒生成的数据条数
f_sequence字段的生成策略是按序列生成并且指定了起始值所以该程序将会在到达序列的结束值之后退出
f_random 字段是按照随机生成,并指定随机生成的范围
f_random_str是一个字符串类型属性中指定了随机生成字符串的长度是10
ts列是一个计算列返回当前的时间.

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: bigdata-sync

View File

@ -0,0 +1,26 @@
---
# loki
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-promtail
namespace: bigdata-sync
data:
config.yaml: |
server:
disable: true
positions:
filename: /mnt/promtail-positions.yaml
clients:
- url: http://loki.logging-loki/loki/api/v1/push
scrape_configs:
- job_name: bigdata-sync
static_configs:
- targets:
- localhost
labels:
job: bigdata-sync
__path__: /mnt/**/*.log
environment: ${ENVIRONMENT_NAME}
replicaset: ${REPLICASET_NAME}
pod: ${POD_NAME}

View File

@ -0,0 +1,65 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: flink-config
namespace: bigdata-sync
labels:
app: flink
data:
flink-conf.yaml: |+
jobmanager.rpc.address: flink-jobmanager
taskmanager.numberOfTaskSlots: 2
blob.server.port: 6124
jobmanager.rpc.port: 6123
taskmanager.rpc.port: 6122
queryable-state.proxy.ports: 6125
jobmanager.memory.process.size: 1600m
taskmanager.memory.process.size: 1728m
parallelism.default: 2
scheduler-mode: reactive
execution.checkpointing.interval: 10s
log4j-console.properties: |+
# This affects logging for both user code and Flink
rootLogger.level = INFO
rootLogger.appenderRef.console.ref = ConsoleAppender
rootLogger.appenderRef.rolling.ref = RollingFileAppender
# Uncomment this if you want to _only_ change Flink's logging
#logger.flink.name = org.apache.flink
#logger.flink.level = INFO
# The following lines keep the log level of common libraries/connectors on
# log level INFO. The root logger does not override this. You have to manually
# change the log levels here.
logger.akka.name = akka
logger.akka.level = INFO
logger.kafka.name= org.apache.kafka
logger.kafka.level = INFO
logger.hadoop.name = org.apache.hadoop
logger.hadoop.level = INFO
logger.zookeeper.name = org.apache.zookeeper
logger.zookeeper.level = INFO
# Log all infos to the console
appender.console.name = ConsoleAppender
appender.console.type = CONSOLE
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
# Log all infos in the given rolling file
appender.rolling.name = RollingFileAppender
appender.rolling.type = RollingFile
appender.rolling.append = false
appender.rolling.fileName = ${sys:log.file}
appender.rolling.filePattern = ${sys:log.file}.%i
appender.rolling.layout.type = PatternLayout
appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
appender.rolling.policies.type = Policies
appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
appender.rolling.policies.size.size=100MB
appender.rolling.strategy.type = DefaultRolloverStrategy
appender.rolling.strategy.max = 10
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
logger.netty.level = OFF

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: flink-jobmanager
namespace: bigdata-sync
spec:
type: ClusterIP
ports:
- name: rpc
port: 6123
- name: blob-server
port: 6124
- name: webui
port: 8081
selector:
app: flink-jobmanager

View File

@ -0,0 +1,41 @@
---
# 存储
apiVersion: v1
kind: PersistentVolume
metadata:
name: bigdata-sync-flink
namespace: bigdata-sync
spec:
capacity:
storage: 100Gi
volumeMode: Filesystem
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: local-storage
local:
path: /k8slpv/bigdata-sync/flink
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- 192.168.90.11
---
# 存储声明
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: flink
namespace: bigdata-sync
spec:
storageClassName: local-storage
volumeName: bigdata-sync-flink
volumeMode: Filesystem
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi

View File

@ -0,0 +1,35 @@
---
# 从镜像中读取配置文件写入到挂载目录中以完成首次初始化配置文件
apiVersion: batch/v1
kind: Job
metadata:
name: init-flink
namespace: bigdata-sync
spec:
template:
spec:
containers:
- name: copy-lib
image: flink:1.12.0-scala_2.11-java8
imagePullPolicy: IfNotPresent
# command: [ "sh", "-c", "sleep 99999"]
command: [ "sh", "-c", "cp -r /opt/flink/lib/* /mnt/"]
volumeMounts:
- mountPath: /mnt
name: flink
subPath: lib
- name: download-lib
image: flink:1.12.0-scala_2.11-java8
imagePullPolicy: IfNotPresent
# command: [ "sh", "-c", "sleep 99999"]
command: [ "sh", "-c", "cd /mnt && wget https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.0.1/flink-sql-connector-mysql-cdc-2.0.1.jar"]
volumeMounts:
- mountPath: /mnt
name: flink
subPath: lib
restartPolicy: Never
volumes:
- name: flink
persistentVolumeClaim:
claimName: flink
backoffLimit: 1

View File

@ -0,0 +1,95 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: flink-jobmanager
namespace: bigdata-sync
spec:
replicas: 1 # Set the value to greater than 1 to start standby JobManagers
selector:
matchLabels:
app: flink-jobmanager
template:
metadata:
labels:
app: flink-jobmanager
spec:
containers:
- name: jobmanager
image: flink:1.12.0-scala_2.11-java8
imagePullPolicy: IfNotPresent
# env:
# - name: POD_IP
# valueFrom:
# fieldRef:
# apiVersion: v1
# fieldPath: status.podIP
# The following args overwrite the value of jobmanager.rpc.address configured in the configuration config map to POD_IP.
args: ["jobmanager"]
# 不能通过pod id注册, 这样会导致task manager无法连接注册上来
# args: ["jobmanager", "$(POD_IP)"]
ports:
- containerPort: 6123
name: rpc
- containerPort: 6124
name: blob-server
- containerPort: 8081
name: webui
livenessProbe:
tcpSocket:
port: 6123
initialDelaySeconds: 30
periodSeconds: 60
volumeMounts:
- name: flink-config-volume
mountPath: /opt/flink/conf
- mountPath: /opt/flink/lib
name: flink
subPath: lib
- name: logs-dir
mountPath: /opt/flink/log
securityContext:
runAsUser: 9999 # refers to user _flink_ from official flink image, change if necessary
# serviceAccountName: flink-service-account # Service account which has the permissions to create, edit, delete ConfigMaps
- name: promtail
image: grafana/promtail:2.2.1
imagePullPolicy: IfNotPresent
args:
- -config.file=/etc/promtail/config.yaml
- -config.expand-env=true
env:
- name: TZ
value: Asia/Shanghai
- name: ENVIRONMENT_NAME
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: REPLICASET_NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app']
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- mountPath: /mnt
name: logs-dir
- mountPath: /etc/promtail
name: promtail-config
volumes:
- name: flink-config-volume
configMap:
name: flink-config
items:
- key: flink-conf.yaml
path: flink-conf.yaml
- key: log4j-console.properties
path: log4j-console.properties
- name: flink
persistentVolumeClaim:
claimName: flink
- name: promtail-config
configMap:
name: loki-promtail
- name: logs-dir
emptyDir: {}

View File

@ -0,0 +1,98 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: flink-taskmanager
namespace: bigdata-sync
spec:
replicas: 2
selector:
matchLabels:
app: flink-taskmanager
template:
metadata:
labels:
app: flink-taskmanager
spec:
initContainers:
- name: wait-jobmanager
image: busybox:latest
imagePullPolicy: IfNotPresent
command:
- sh
- -c
- |
set -ex
until nc -zv flink-jobmanager 8081; do sleep 5; done
sleep 10
containers:
- name: taskmanager
image: flink:1.12.0-scala_2.11-java8
imagePullPolicy: IfNotPresent
args: ["taskmanager"]
# command:
# - sh
# - -c
# - "sleep 9999"
ports:
- containerPort: 6122
name: rpc
- containerPort: 6125
name: query-state
livenessProbe:
tcpSocket:
port: 6122
initialDelaySeconds: 30
periodSeconds: 60
volumeMounts:
- name: flink-config-volume
mountPath: /opt/flink/conf/
- mountPath: /opt/flink/lib
name: flink
subPath: lib
- name: logs-dir
mountPath: /opt/flink/log
# securityContext:
# runAsUser: 9999 # refers to user _flink_ from official flink image, change if necessary
- name: promtail
image: grafana/promtail:2.2.1
imagePullPolicy: IfNotPresent
args:
- -config.file=/etc/promtail/config.yaml
- -config.expand-env=true
env:
- name: TZ
value: Asia/Shanghai
- name: ENVIRONMENT_NAME
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: REPLICASET_NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app']
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- mountPath: /mnt
name: logs-dir
- mountPath: /etc/promtail
name: promtail-config
volumes:
- name: flink-config-volume
configMap:
name: flink-config
items:
- key: flink-conf.yaml
path: flink-conf.yaml
- key: log4j-console.properties
path: log4j-console.properties
- name: flink
persistentVolumeClaim:
claimName: flink
- name: promtail-config
configMap:
name: loki-promtail
- name: logs-dir
emptyDir: {}

View File

@ -0,0 +1,23 @@
---
# 外部域名
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: flink-jobmanager
namespace: bigdata-sync
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: "0"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
spec:
rules:
- host: flink-bigdata-sync.local.wangjiahuan.com
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: flink-jobmanager
port:
name: webui

View File

@ -0,0 +1,264 @@
---
# 存储
apiVersion: v1
kind: PersistentVolume
metadata:
name: bigdata-sync-flink-streaming-platform-web-mysql
namespace: bigdata-sync
spec:
capacity:
storage: 100Gi
volumeMode: Filesystem
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: local-storage
local:
path: /k8slpv/bigdata-sync/flink-streaming-platform-web
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- 192.168.90.11
---
# 存储声明
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: flink-streaming-platform-web-mysql
namespace: bigdata-sync
spec:
storageClassName: local-storage
volumeName: bigdata-sync-flink-streaming-platform-web-mysql
volumeMode: Filesystem
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi
---
# 外部域名
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: flink-streaming-platform-web
namespace: bigdata-sync
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: "0"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
spec:
rules:
- host: web-bigdata-sync.local.wangjiahuan.com
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: flink-streaming-platform-web
port:
number: 8080
---
# 内部域名
apiVersion: v1
kind: Service
metadata:
name: flink-streaming-platform-web
namespace: bigdata-sync
spec:
type: LoadBalancer
ports:
- name: for-8080
port: 8080
targetPort: 8080
- name: for-3306
port: 3306
targetPort: 3306
selector:
app: flink-streaming-platform-web
#---
## 配置文件
#apiVersion: v1
#kind: ConfigMap
#metadata:
# name: flink-streaming-platform-web
# namespace: bigdata-sync
#data:
# application.properties: |
# ####jdbc信息
# server.port=8080
# spring.datasource.url=jdbc:mysql://127.0.0.1:3306/flink_web?characterEncoding=UTF-8&useSSL=false
# spring.datasource.username=flink_web_test
# spring.datasource.password=flink_web_test_123
---
# 服务
---
# 服务
apiVersion: apps/v1
kind: Deployment
metadata:
name: flink-streaming-platform-web
namespace: bigdata-sync
labels:
app: flink-streaming-platform-web
spec:
replicas: 1
selector:
matchLabels:
app: flink-streaming-platform-web
template:
metadata:
labels:
app: flink-streaming-platform-web
spec:
restartPolicy: Always
initContainers:
- name: wait-jobmanager
image: busybox:latest
imagePullPolicy: IfNotPresent
command:
- sh
- -c
- |
set -ex
until nc -zv flink-jobmanager 8081; do sleep 5; done
sleep 10
containers:
- name: mysql
image: mysql:8.0.25
imagePullPolicy: IfNotPresent
args:
- --character-set-server=utf8mb4
- --collation-server=utf8mb4_unicode_ci
ports:
- containerPort: 3306
# livenessProbe:
# tcpSocket:
# port: 3306
# failureThreshold: 10
# periodSeconds: 60
# initialDelaySeconds: 10
env:
- name: TZ
value: Asia/Shanghai
- name: MYSQL_ROOT_PASSWORD
value: UWWDEEH8BZ0gUAX
- name: MYSQL_DATABASE
value: flink_web
- name: MYSQL_USER
value: flink_web_test
- name: MYSQL_PASSWORD
value: flink_web_test_123
volumeMounts:
- mountPath: /var/lib/mysql
name: flink-streaming-platform-web-mysql
resources:
requests:
cpu: 100m
memory: 100Mi
ephemeral-storage: 1Gi
limits:
cpu: 5000m
memory: 5000Mi
ephemeral-storage: 10Gi
- name: flink-streaming-platform-web
image: tanshilindocker/flink-streaming-platform-web:0.0.5
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8080
# command:
# - sh
# - -c
# - "sleep 99999"
# livenessProbe:
# tcpSocket:
# port: 8000
# failureThreshold: 10
# periodSeconds: 60
# initialDelaySeconds: 10
args:
- -Xmx1888M -Xms1888M -Xmn1536M -XX:MaxMetaspaceSize=512M -XX:MetaspaceSize=512M -XX:+UseConcMarkSweepGC -Xdebug -Xrunjdwp:transport=dt_socket,address=9901,server=y,suspend=n -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -Dcom.sun.management.jmxremote.port=8999 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses -XX:+CMSClassUnloadingEnabled -XX:+ParallelRefProcEnabled -XX:+CMSScavengeBeforeRemark -XX:ErrorFile=../logs/hs_err_pid%p.log -XX:HeapDumpPath=../logs -XX:+HeapDumpOnOutOfMemoryError
- --server.port=8080
- --spring.datasource.url=jdbc:mysql://127.0.0.1:3306/flink_web?characterEncoding=UTF-8&useSSL=false&allowPublicKeyRetrieval=true
- --spring.datasource.username=flink_web_test
- --spring.datasource.password=flink_web_test_123
- --logging.config=classpath:logging/logback-prod.xml
- --spring.devtools.livereload.enabled=false
env:
- name: TZ
value: Asia/Shanghai
resources:
requests:
cpu: 100m
memory: 100Mi
ephemeral-storage: 1Gi
limits:
cpu: 5000m
memory: 5000Mi
ephemeral-storage: 10Gi
volumeMounts:
- name: flink-config-volume
mountPath: /opt/flink/conf
- mountPath: /app/flink-streaming-platform-web/logs
name: logs-dir
# - mountPath: /app/flink-streaming-platform-web/conf
# name: flink-streaming-platform-web
- name: promtail
image: grafana/promtail:2.2.1
imagePullPolicy: IfNotPresent
args:
- -config.file=/etc/promtail/config.yaml
- -config.expand-env=true
env:
- name: TZ
value: Asia/Shanghai
- name: ENVIRONMENT_NAME
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: REPLICASET_NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app']
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- mountPath: /mnt
name: logs-dir
- mountPath: /etc/promtail
name: promtail-config
resources:
requests:
cpu: 50m
memory: 0.05Gi
ephemeral-storage: 1Gi
limits:
cpu: 500m
memory: 0.5Gi
ephemeral-storage: 10Gi
volumes:
- name: flink-streaming-platform-web-mysql
persistentVolumeClaim:
claimName: flink-streaming-platform-web-mysql
# - name: flink-streaming-platform-web
# configMap:
# name: flink-streaming-platform-web
- name: flink-config-volume
configMap:
name: flink-config
items:
- key: flink-conf.yaml
path: flink-conf.yaml
- key: log4j-console.properties
path: log4j-console.properties
- name: promtail-config
configMap:
name: loki-promtail
- name: logs-dir
emptyDir: {}

View File

@ -0,0 +1,7 @@
FROM flink:1.12.0-scala_2.11-java8
ADD flink-streaming-platform-web.tar.gz /app/
RUN rm -rf /app/flink-streaming-platform-web.tar.gz
WORKDIR /app/flink-streaming-platform-web/lib
VOLUME /app/flink-streaming-platform-web/conf
VOLUME /app/flink-streaming-platform-web/logs
ENTRYPOINT ["java", "-jar", "flink-streaming-web-1.5.0.RELEASE.jar"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -0,0 +1,41 @@
https://github.com/zhp8341/flink-streaming-platform-web/archive/refs/heads/master.zip
Dockerfile
```
```
```
yum install -y wegt unzip
mkdir test2 && cd test2 && rm -rf main.zip
wget https://github.com/zhp8341/flink-streaming-platform-web/archive/refs/heads/master.zip
unzip main.zip && cd executor-auto-flush-data-main
docker build -t tanshilindocker/flink-streaming-platform-web:0.0.5 -f Dockerfile .
docker login --username="" --password=""
docker push tanshilindocker/flink-streaming-platform-web:0.0.5
```
测试
```
docker run -it tanshilindocker/flink-streaming-platform-web:0.0.1 bash
docker run -it tanshilindocker/flink-streaming-platform-web:0.0.1
docker run -it -v application.properties:/app/flink-streaming-platform-web/conf/application.properties --net=host tanshilindocker/flink-streaming-platform-web:0.0.1
java -jar lib/$project --spring.profiles.active=$env --spring.config.additional-location=conf/application.properties
```

View File

@ -0,0 +1,106 @@
初始化数据目录(暂时不需要)
```
rm -rf /k8slpv/bigdata-sync/flink-streaming-platform-web /k8slpv/bigdata-sync/flink
mkdir -p /k8slpv/bigdata-sync/flink-streaming-platform-web && chmod 777 /k8slpv/bigdata-sync/flink-streaming-platform-web
mkdir -p /k8slpv/bigdata-sync/flink && chmod 777 /k8slpv/bigdata-sync/flink
```
# 部署
如果并没有loki采集日志, 则建议去掉loki部分(反之则可以加上, 目前默认就去掉了)
按照顺序一次执行文件即可(首次启动会导致部分服务启动, 问题不大, 等待重启即可)
注意需要改一下两个ingress的host值
```
0.0-ns.yaml
0.1-log-config.yaml
1.0-flink-reactive-mode-configuration-configmap.yaml
1.1-jobmanager-service.yaml
1.2-jobmanager-session-deployment-ha.yaml
1.3-taskmanager-session-deployment.yaml
1.4-ingress.yaml
2.0-flink-streaming-platform-web.yaml
```
```
kubectl apply -f .
```
# 调试
## Flink
### init
```
kubectl -n bigdata-sync get job
kubectl -n bigdata-sync get pod|grep init
kubectl -n bigdata-sync logs -f --tail 100 job/init-flink
kubectl -n bigdata-sync exec -it init-flink-h4fwp -- bash
```
### jobmanager
```
kubectl -n bigdata-sync get deployment|grep jobmanager
kubectl -n bigdata-sync describe deployment flink-jobmanager
kubectl -n bigdata-sync get pod|grep jobmanager
kubectl -n bigdata-sync describe pod flink-jobmanager-54c64c7b88-l2fmb
kubectl -n bigdata-sync logs -f --tail 100 deployment/flink-jobmanager -c jobmanager
kubectl -n bigdata-sync exec -it flink-jobmanager-6fc5496f7-h74fz flink-jobmanager -- bash
```
### taskmanager
```
kubectl -n bigdata-sync get deployment|grep taskmanager
kubectl -n bigdata-sync get pod -o wide|grep taskmanager
kubectl -n bigdata-sync logs -f --tail 100 deployment/flink-taskmanager
kubectl -n bigdata-sync logs -f --tail 100 flink-taskmanager-56cc8c749-7mjnz -c taskmanager
kubectl -n bigdata-sync exec -it flink-taskmanager-5dc5458c6d-jdnrs flink-taskmanager-c taskmanager -- bash
telnet flink-jobmanager 6123
```
## Web
### mysql
```
kubectl -n bigdata-sync get svc|grep flink-streaming-platform-web
```
### web
```
kubectl -n bigdata-sync get deployment|grep flink-streaming-platform-web
kubectl -n bigdata-sync describe deployment flink-streaming-platform-web
kubectl -n bigdata-sync get pod|grep flink-streaming-platform-web
kubectl -n bigdata-sync describe pod flink-streaming-platform-web-8cc54d699-x942d
kubectl -n bigdata-sync logs -f --tail 100 deployment/flink-streaming-platform-web flink-streaming-platform-web
kubectl -n bigdata-sync exec -it flink-streaming-platform-web-85878766bb-r64lc -c flink-streaming-platform-web -- bash
```
# 访问
http://flink-bigdata-sync.local.wangjiahuan.com
http://web-bigdata-sync.local.wangjiahuan.com admin/123456

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-streaming-platform-web</artifactId>
<groupId>com.streaming.platform.web</groupId>
<version>1.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>flink-streaming-commom</artifactId>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.20</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,47 @@
package com.flink.streaming.common.constant;
import java.util.regex.Pattern;
/**
* @author zhuhuipei
* @Description:
* @date 2020-06-23
* @time 23:03
*/
public class SystemConstant {
public static final String COMMENT_SYMBOL = "--";
public static final String SEMICOLON = ";";
public static final String LINE_FEED = "\n";
public static final String SPACE = "";
public static final String VIRGULE = "/";
public static final int DEFAULT_PATTERN_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.DOTALL;
public static final String JARVERSION = "lib/flink-streaming-core-1.5.0.RELEASE.jar";
public static final String QUERY_JOBID_KEY_WORD = "job-submitted-success:";
public static final String QUERY_JOBID_KEY_WORD_BACKUP = "Job has been submitted with JobID";
public static final String JAR_ROOT_PATH = "/upload_jars/";
public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH";
public static final String LOCAL_IP = "127.0.0.1";
}

View File

@ -0,0 +1,37 @@
package com.flink.streaming.common.enums;
import java.util.Set;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/21
* @time 20:01
*/
public enum CheckPointParameterEnums {
checkpointDir,
checkpointingMode,
checkpointInterval,
checkpointTimeout,
tolerableCheckpointFailureNumber,
asynchronousSnapshots,
externalizedCheckpointCleanup,
stateBackendType,
enableIncremental;
public static void isExits(Set<String> keys) {
for (String key : keys) {
boolean exits = false;
for (CheckPointParameterEnums checkPointParameterEnums : CheckPointParameterEnums.values()) {
if (checkPointParameterEnums.name().equalsIgnoreCase(key)) {
exits = true;
continue;
}
}
if (!exits) {
throw new RuntimeException(key + " 暂时不支持使用");
}
}
}
}

View File

@ -0,0 +1,15 @@
package com.flink.streaming.common.enums;
import lombok.Getter;
@Getter
public enum FileTypeEnum {
JAR(1);
private int code;
FileTypeEnum(int code) {
this.code = code;
}
}

View File

@ -0,0 +1,35 @@
package com.flink.streaming.common.enums;
import lombok.Getter;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/28
* @time 11:14
*/
@Getter
public enum JobTypeEnum {
SQL_STREAMING(0), JAR(1), SQL_BATCH(2);
private int code;
JobTypeEnum(int code) {
this.code = code;
}
public static JobTypeEnum getJobTypeEnum(Integer code) {
if (code == null) {
return null;
}
for (JobTypeEnum jobTypeEnum : JobTypeEnum.values()) {
if (code == jobTypeEnum.getCode()) {
return jobTypeEnum;
}
}
return null;
}
}

View File

@ -0,0 +1,131 @@
package com.flink.streaming.common.enums;
import com.flink.streaming.common.constant.SystemConstant;
import lombok.Getter;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Pattern;
/**
* @author zhuhuipei
* @Description:
* @date 2020-06-23
* @time 02:49
*/
@Getter
public enum SqlCommand {
INSERT_INTO(
"(INSERT\\s+INTO.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
INSERT_OVERWRITE(
"(INSERT\\s+OVERWRITE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
CREATE_TABLE(
"(CREATE\\s+TABLE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
CREATE_FUNCTION(
"(CREATE\\s+FUNCTION.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
CREATE_VIEW(
"(CREATE\\s+VIEW.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
USE(
"(USE\\s+(?!CATALOG)(.*))",
(operands) -> Optional.of(new String[]{operands[0]})),
USE_CATALOG(
"(USE\\s+CATALOG.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
DROP_TABLE(
"(DROP\\s+TABLE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
DROP_DATABASE(
"(DROP\\s+DATABASE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
DROP_VIEW(
"(DROP\\s+VIEW.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
DROP_FUNCTION(
"(DROP\\s+FUNCTION.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
ALTER_TABLE(
"(ALTER\\s+TABLE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
ALTER_DATABASE(
"(ALTER\\s+DATABASE.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
ALTER_FUNCTION(
"(ALTER\\s+FUNCTION.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
SELECT(
"(WITH.*SELECT.*|SELECT.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
SHOW_CATALOGS(
"SHOW\\s+CATALOGS",
(operands) -> Optional.of(new String[]{"SHOW CATALOGS"})),
SHOW_DATABASES(
"SHOW\\s+DATABASES",
(operands) -> Optional.of(new String[]{"SHOW DATABASES"})),
SHOW_TABLES(
"SHOW\\s+TABLES",
(operands) -> Optional.of(new String[]{"SHOW TABLES"})),
SHOW_FUNCTIONS(
"SHOW\\s+FUNCTIONS",
(operands) -> Optional.of(new String[]{"SHOW FUNCTIONS"})),
SHOW_MODULES(
"SHOW\\s+MODULES",
(operands) -> Optional.of(new String[]{"SHOW MODULES"})),
CREATE_CATALOG(
"(CREATE\\s+CATALOG.*)",
(operands) -> Optional.of(new String[]{operands[0]})),
SET(
"SET(\\s+(\\S+)\\s*=(.*))?",
(operands) -> {
if (operands.length >= 3) {
if (operands[0] == null) {
return Optional.of(new String[0]);
}
} else {
return Optional.empty();
}
return Optional.of(new String[]{operands[1], operands[2]});
}),
BEGIN_STATEMENT_SET("BEGIN\\s+STATEMENT\\s+SET",
(operands) -> Optional.of(new String[]{"BEGIN STATEMENT SET"})),
END("END", (operands) -> Optional.of(new String[]{"END"}));
private final Pattern pattern;
private final Function<String[], Optional<String[]>> operandConverter;
SqlCommand(String matchingRegex, Function<String[], Optional<String[]>> operandConverter) {
this.pattern = Pattern.compile(matchingRegex, SystemConstant.DEFAULT_PATTERN_FLAGS);
this.operandConverter = operandConverter;
}
}

View File

@ -0,0 +1,35 @@
package com.flink.streaming.common.enums;
import lombok.Getter;
import org.apache.commons.lang3.StringUtils;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/7
* @time 16:27
*/
@Getter
public enum StateBackendEnum {
MEMORY("0"), FILE("1"), ROCKSDB("2");
private String type;
StateBackendEnum(String type) {
this.type = type;
}
public static StateBackendEnum getStateBackend(String stateBackendType) {
if (StringUtils.isEmpty(stateBackendType)) {
return FILE;
}
for (StateBackendEnum stateBackendEnum : StateBackendEnum.values()) {
if (stateBackendEnum.getType().equalsIgnoreCase(stateBackendType.trim())) {
return stateBackendEnum;
}
}
throw new RuntimeException("stateBackendType值只能是 0 1 2 非法参数值" + stateBackendType);
}
}

View File

@ -0,0 +1,64 @@
package com.flink.streaming.common.model;
import com.flink.streaming.common.enums.StateBackendEnum;
import lombok.Data;
/**
* @author zhuhuipei
* @Description:
* @date 2020-08-21
* @time 23:16
*/
@Data
public class CheckPointParam {
/**
* 默认60S
*/
private long checkpointInterval = 1000 * 60L;
/**
* 默认CheckpointingMode.EXACTLY_ONCE
*/
private String checkpointingMode = "EXACTLY_ONCE";
/**
* 默认超时10 minutes.
*/
private long checkpointTimeout = 10 * 60 * 1000;
/**
* 目录
*/
private String checkpointDir;
/**
* 设置失败次数 默认一次
*/
private int tolerableCheckpointFailureNumber = 1;
/**
* 是否异步
*/
private Boolean asynchronousSnapshots;
/**
* 检查点在作业取消后的保留策略DELETE_ON_CANCELLATION代表删除RETAIN_ON_CANCELLATION代表保留
*/
private String externalizedCheckpointCleanup;
/**
* 后端状态类型
*/
private StateBackendEnum stateBackendEnum;
/**
* 支持增量
*/
private Boolean enableIncremental;
}

View File

@ -0,0 +1,36 @@
package com.flink.streaming.common.model;
import com.flink.streaming.common.enums.SqlCommand;
import lombok.Data;
/**
* @author zhuhuipei
* @Description:
* @date 2020-06-23
* @time 02:56
*/
@Data
public class SqlCommandCall {
private SqlCommand sqlCommand;
private String[] operands;
public SqlCommandCall(SqlCommand sqlCommand, String[] operands) {
this.sqlCommand = sqlCommand;
this.operands = operands;
}
public SqlCommandCall(String[] operands) {
this.operands = operands;
}
public SqlCommand getSqlCommand() {
return sqlCommand;
}
public String[] getOperands() {
return operands;
}
}

View File

@ -0,0 +1,120 @@
package com.flink.streaming.common.sql;
import com.flink.streaming.common.constant.SystemConstant;
import com.flink.streaming.common.enums.SqlCommand;
import com.flink.streaming.common.model.SqlCommandCall;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
/**
* @author zhuhuipei
* @Description:
* @date 2020-06-23
* @time 02:22
*/
public class SqlFileParser {
public static List<String> parserSql(List<String> lineList) {
if (CollectionUtils.isEmpty(lineList)) {
throw new RuntimeException("lineList is null");
}
List<String> sqlList = new ArrayList<>();
StringBuilder stmt = new StringBuilder();
for (String line : lineList) {
//开头是 -- 的表示注释
if (line.trim().isEmpty() || line.startsWith(SystemConstant.COMMENT_SYMBOL)
|| trimStart(line).startsWith(SystemConstant.COMMENT_SYMBOL)) {
continue;
}
stmt.append(SystemConstant.LINE_FEED).append(line);
if (line.trim().endsWith(SystemConstant.SEMICOLON)) {
sqlList.add(stmt.substring(0, stmt.length() - 1));
//初始化
stmt.setLength(0);
}
}
return sqlList;
}
public static List<SqlCommandCall> fileToSql(List<String> lineList) {
if (CollectionUtils.isEmpty(lineList)) {
throw new RuntimeException("lineList is null");
}
List<SqlCommandCall> sqlCommandCallList = new ArrayList<>();
StringBuilder stmt = new StringBuilder();
for (String line : lineList) {
//开头是 -- 的表示注释
if (line.trim().isEmpty() || line.startsWith(SystemConstant.COMMENT_SYMBOL)
|| trimStart(line).startsWith(SystemConstant.COMMENT_SYMBOL)) {
continue;
}
stmt.append(SystemConstant.LINE_FEED).append(line);
if (line.trim().endsWith(SystemConstant.SEMICOLON)) {
Optional<SqlCommandCall> optionalCall = parse(stmt.toString());
if (optionalCall.isPresent()) {
sqlCommandCallList.add(optionalCall.get());
} else {
throw new RuntimeException("不支持该语法使用" + stmt.toString() + "'");
}
stmt.setLength(0);
}
}
return sqlCommandCallList;
}
private static Optional<SqlCommandCall> parse(String stmt) {
stmt = stmt.trim();
if (stmt.endsWith(SystemConstant.SEMICOLON)) {
stmt = stmt.substring(0, stmt.length() - 1).trim();
}
for (SqlCommand cmd : SqlCommand.values()) {
final Matcher matcher = cmd.getPattern().matcher(stmt);
if (matcher.matches()) {
final String[] groups = new String[matcher.groupCount()];
for (int i = 0; i < groups.length; i++) {
groups[i] = matcher.group(i + 1);
}
return cmd.getOperandConverter().apply(groups)
.map((operands) -> new SqlCommandCall(cmd, operands));
}
}
return Optional.empty();
}
private static String trimStart(String str) {
if (StringUtils.isEmpty(str)) {
return str;
}
final char[] value = str.toCharArray();
int start = 0, last = 0 + str.length() - 1;
int end = last;
while ((start <= end) && (value[start] <= ' ')) {
start++;
}
if (start == 0 && end == last) {
return str;
}
if (start >= end) {
return "";
}
return str.substring(start, end);
}
}

View File

@ -0,0 +1,51 @@
package com.flink.streaming.common.utils;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author zhuhuipei
* @Description:
* @date 2022/10/21
*/
public class UrlUtils {
private static final String REG_1 = "^([hH][tT]{2}[pP]://|[hH][tT]{2}[pP][sS]://)(([A-Za-z0-9-~]+).)+([A-Za-z0-9-~\\\\/])+$";
public static boolean isHttpsOrHttp(String url) {
Pattern p = Pattern.compile(REG_1);
Matcher m = p.matcher(url.trim());
if (!m.matches()) {
return false;
}
return true;
}
public static List<String> getSqlList(String sqlUrl) {
List<String> fileList = new ArrayList<String>();
try {
URL url = new URL(sqlUrl);
InputStream in = url.openStream();
InputStreamReader isr = new InputStreamReader(in);
BufferedReader bufr = new BufferedReader(isr);
String str;
while ((str = bufr.readLine()) != null) {
fileList.add(str);
}
bufr.close();
isr.close();
in.close();
return fileList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -0,0 +1,40 @@
package com.flink.streaming.common;
import com.flink.streaming.common.enums.SqlCommand;
import org.junit.Test;
import java.util.Arrays;
import java.util.regex.Matcher;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/21
* @time 22:35
*/
public class TestSqlCommand {
@Test
public void testCommands() {
// testValidSqlCommand("select 'xxx', f0,f1,f2 from source_table",SqlCommand.SELECT);
testValidSqlCommand("show CATALOGS ",SqlCommand.SHOW_CATALOGS);
testValidSqlCommand(" USE CATALOGS xxx ",SqlCommand.USE);
}
private void testValidSqlCommand( String matcherStr, SqlCommand sqlCommand) {
final Matcher matcher = sqlCommand.getPattern() .matcher(matcherStr);
if (matcher.matches()) {
System.out.println("WITH 匹配成功:"+true+"matcherStr="+matcherStr);
final String[] groups = new String[matcher.groupCount()];
for (int i = 0; i < groups.length; i++) {
groups[i] = matcher.group(i + 1);
}
System.out.println("匹配到的值是:"+ Arrays.toString(groups));
} else {
System.out.println("WITH 匹配成功:"+true+"matcherStr="+matcherStr);
}
System.out.println("######################### \n \n");
}
}

View File

@ -0,0 +1,231 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-streaming-platform-web</artifactId>
<groupId>com.streaming.platform.web</groupId>
<version>1.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-core</artifactId>
<version>${flink_streaming_version}</version>
<dependencies>
<dependency>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-commom</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
<!-- Flink modules start -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>scala-library</artifactId>
<groupId>org.scala-lang</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-avro</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>scala-parser-combinators_2.11</artifactId>
<groupId>org.scala-lang.modules</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- rocksdb-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- Hive Connector的支持仅在编译时生效-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- Flink modules end -->
</dependencies>
<build>
<finalName>flink-streaming-core</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<!-- Run shade goal on package phase -->
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shading</exclude>
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- <exclude>org.slf4j:*</exclude>-->
<!-- <exclude>log4j:*</exclude>-->
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,30 @@
CREATE TABLE test (
id INT NOT NULL,
name STRING,
age INT
) WITH (
'connector' = 'mysql-cdc',
'hostname' = '192.168.79.128',
'port' = '3306',
'username' = 'root',
'password' = '123456',
'database-name' = 'mydb',
'table-name' = 'test',
'scan.incremental.snapshot.enabled'='false'
);
CREATE TABLE test_sink (
id INT NOT NULL,
name STRING,
age INT,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://192.168.79.128:3306/mydb?characterEncoding=UTF-8',
'table-name' = 'test_sink',
'username' = 'root',
'password' = '123456'
);
select * from test;
insert into test_sink select * from test;

View File

@ -0,0 +1,46 @@
BEGIN STATEMENT SET;
SET 'table.local-time-zone' = 'Asia/Shanghai';
CREATE CATALOG testmyhive WITH (
'type' = 'hive',
'default-database' = 'zhp',
'hive-conf-dir' = '/Users/huipeizhu/hive-conf'
);
USE CATALOG testmyhive;
drop table IF EXISTS item_test;
drop table IF EXISTS hive_flink_table;
create table item_test (
itemId BIGINT,
price BIGINT,
proctime AS PROCTIME ()
)with (
'connector' = 'kafka',
'topic' = 'flink-catalog-v1',
'properties.bootstrap.servers'='127.0.0.1:9092',
'properties.group.id'='test-1',
'format'='json',
'scan.startup.mode' = 'earliest-offset'
);
SET 'table.sql-dialect'='hive';
CREATE TABLE hive_flink_table (
itemId BIGINT,
price BIGINT,
ups string
) TBLPROPERTIES (
'sink.rolling-policy.rollover-interval'='1min',
'sink.partition-commit.trigger'='process-time',
'sink.partition-commit.policy.kind'='metastore,success-file'
);
SET 'table.sql-dialect'=default;
insert into hive_flink_table select itemId,price, 'XXXXaaa' as ups from item_test;

View File

@ -0,0 +1,110 @@
package com.flink.streaming.core;
import com.flink.streaming.common.constant.SystemConstant;
import com.flink.streaming.common.enums.JobTypeEnum;
import com.flink.streaming.common.sql.SqlFileParser;
import com.flink.streaming.common.utils.UrlUtils;
import com.flink.streaming.core.checkpoint.CheckPointParams;
import com.flink.streaming.core.checkpoint.FsCheckPoint;
import com.flink.streaming.core.execute.ExecuteSql;
import com.flink.streaming.core.model.JobRunParam;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author zhuhuipei
* @Description:
* @date 2020-06-23
* @time 00:33
*/
public class JobApplication {
private static final Logger LOG = LoggerFactory.getLogger(JobApplication.class);
public static void main(String[] args) {
try {
Arrays.stream(args).forEach(arg -> LOG.info("{}", arg));
JobRunParam jobRunParam = buildParam(args);
List<String> fileList = null;
if (UrlUtils.isHttpsOrHttp(jobRunParam.getSqlPath())) {
fileList = UrlUtils.getSqlList(jobRunParam.getSqlPath());
} else {
fileList = Files.readAllLines(Paths.get(jobRunParam.getSqlPath()));
}
List<String> sqlList = SqlFileParser.parserSql(fileList);
EnvironmentSettings settings = null;
TableEnvironment tEnv = null;
if (jobRunParam.getJobTypeEnum() != null
&& JobTypeEnum.SQL_BATCH.equals(jobRunParam.getJobTypeEnum())) {
LOG.info("[SQL_BATCH]本次任务是批任务");
//批处理
settings = EnvironmentSettings.newInstance()
.inBatchMode()
.build();
tEnv = TableEnvironment.create(settings);
} else {
LOG.info("[SQL_STREAMING]本次任务是流任务");
//默认是流 流处理 目的是兼容之前版本
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
tEnv = StreamTableEnvironment.create(env, settings);
//设置checkPoint
FsCheckPoint.setCheckpoint(env, jobRunParam.getCheckPointParam());
}
JobID jobID = ExecuteSql.exeSql(sqlList, tEnv);
System.out.println(SystemConstant.QUERY_JOBID_KEY_WORD + jobID);
LOG.info(SystemConstant.QUERY_JOBID_KEY_WORD + "{}", jobID);
} catch (Exception e) {
System.err.println("任务执行失败:" + e.getMessage());
LOG.error("任务执行失败:", e);
}
}
private static JobRunParam buildParam(String[] args) throws Exception {
ParameterTool parameterTool = ParameterTool.fromArgs(args);
String sqlPath = parameterTool.get("sql");
if (StringUtils.isEmpty(sqlPath)) {
throw new NullPointerException("-sql参数 不能为空");
}
JobRunParam jobRunParam = new JobRunParam();
jobRunParam.setSqlPath(sqlPath);
jobRunParam.setCheckPointParam(CheckPointParams.buildCheckPointParam(parameterTool));
String type = parameterTool.get("type");
if (StringUtils.isNotEmpty(type)) {
jobRunParam.setJobTypeEnum(JobTypeEnum.getJobTypeEnum(Integer.valueOf(type)));
}
return jobRunParam;
}
}

View File

@ -0,0 +1,96 @@
package com.flink.streaming.core.checkpoint;
import com.flink.streaming.common.constant.SystemConstant;
import com.flink.streaming.common.enums.CheckPointParameterEnums;
import com.flink.streaming.common.enums.StateBackendEnum;
import com.flink.streaming.common.model.CheckPointParam;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.CheckpointingMode;
/**
* @author zhuhuipei
* @Description:
* @date 2021/1/17
* @time 19:56
*/
@Slf4j
public class CheckPointParams {
/**
* 构建checkPoint参数
*
* @author zhuhuipei
* @date 2020-08-23
* @time 22:44
*/
public static CheckPointParam buildCheckPointParam(ParameterTool parameterTool) throws Exception {
String checkpointDir = parameterTool
.get(CheckPointParameterEnums.checkpointDir.name(), SystemConstant.SPACE);
//如果checkpointDir为空不启用CheckPoint
if (StringUtils.isEmpty(checkpointDir)) {
return null;
}
String checkpointingMode = parameterTool.get(CheckPointParameterEnums.checkpointingMode.name(),
CheckpointingMode.EXACTLY_ONCE.name());
String checkpointInterval = parameterTool
.get(CheckPointParameterEnums.checkpointInterval.name(),
SystemConstant.SPACE);
String checkpointTimeout = parameterTool
.get(CheckPointParameterEnums.checkpointTimeout.name(), SystemConstant.SPACE);
String tolerableCheckpointFailureNumber =
parameterTool.get(CheckPointParameterEnums.tolerableCheckpointFailureNumber.name(),
SystemConstant.SPACE);
String asynchronousSnapshots = parameterTool
.get(CheckPointParameterEnums.asynchronousSnapshots.name(), SystemConstant.SPACE);
String externalizedCheckpointCleanup =
parameterTool.get(CheckPointParameterEnums.externalizedCheckpointCleanup.name(),
SystemConstant.SPACE);
String stateBackendType = parameterTool
.get(CheckPointParameterEnums.stateBackendType.name(), SystemConstant.SPACE);
String enableIncremental = parameterTool
.get(CheckPointParameterEnums.enableIncremental.name(), SystemConstant.SPACE);
CheckPointParam checkPointParam = new CheckPointParam();
if (StringUtils.isNotEmpty(asynchronousSnapshots)) {
checkPointParam.setAsynchronousSnapshots(Boolean.parseBoolean(asynchronousSnapshots));
}
checkPointParam.setCheckpointDir(checkpointDir);
checkPointParam.setCheckpointingMode(checkpointingMode);
if (StringUtils.isNotEmpty(checkpointInterval)) {
checkPointParam.setCheckpointInterval(Long.valueOf(checkpointInterval));
}
if (StringUtils.isNotEmpty(checkpointTimeout)) {
checkPointParam.setCheckpointTimeout(Long.valueOf(checkpointTimeout));
}
if (StringUtils.isNotEmpty(tolerableCheckpointFailureNumber)) {
checkPointParam
.setTolerableCheckpointFailureNumber(Integer.valueOf(tolerableCheckpointFailureNumber));
}
if (StringUtils.isNotEmpty(externalizedCheckpointCleanup)) {
checkPointParam.setExternalizedCheckpointCleanup(externalizedCheckpointCleanup);
}
checkPointParam.setStateBackendEnum(StateBackendEnum.getStateBackend(stateBackendType));
if (StringUtils.isNotEmpty(enableIncremental)) {
checkPointParam.setEnableIncremental(Boolean.parseBoolean(enableIncremental.trim()));
}
log.info("checkPointParam={}", checkPointParam);
System.out.println("checkPointParam=" + checkPointParam);
return checkPointParam;
}
}

View File

@ -0,0 +1,115 @@
package com.flink.streaming.core.checkpoint;
import com.flink.streaming.common.model.CheckPointParam;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.runtime.state.memory.MemoryStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.io.IOException;
/**
* @author zhuhuipei
* @Description:
* @date 2021/1/17
* @time 20:54
*/
@Slf4j
public class FsCheckPoint {
public static void setCheckpoint(StreamExecutionEnvironment env, CheckPointParam checkPointParam)
throws Exception {
if (checkPointParam == null) {
log.warn("############没有启用Checkpoint############");
return;
}
if (StringUtils.isEmpty(checkPointParam.getCheckpointDir())) {
throw new RuntimeException("checkpoint目录不存在");
}
// 默认每60s保存一次checkpoint
env.enableCheckpointing(checkPointParam.getCheckpointInterval());
CheckpointConfig checkpointConfig = env.getCheckpointConfig();
//开始一致性模式是精确一次 exactly-once
if (StringUtils.isEmpty(checkPointParam.getCheckpointingMode())
|| CheckpointingMode.EXACTLY_ONCE.name()
.equalsIgnoreCase(checkPointParam.getCheckpointingMode())) {
checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
log.info("本次CheckpointingMode模式 精确一次 即exactly-once");
} else {
checkpointConfig.setCheckpointingMode(CheckpointingMode.AT_LEAST_ONCE);
log.info("本次CheckpointingMode模式 至少一次 即AT_LEAST_ONCE");
}
//默认超时10 minutes.
checkpointConfig.setCheckpointTimeout(checkPointParam.getCheckpointTimeout());
//确保检查点之间有至少500 ms的间隔checkpoint最小间隔
checkpointConfig.setMinPauseBetweenCheckpoints(500);
//同一时间只允许进行一个检查点
checkpointConfig.setMaxConcurrentCheckpoints(2);
//设置失败次数
checkpointConfig
.setTolerableCheckpointFailureNumber(checkPointParam.getTolerableCheckpointFailureNumber());
//设置后端状态
setStateBackend(env, checkPointParam);
//检查点在作业取消后的保留策略DELETE_ON_CANCELLATION代表删除RETAIN_ON_CANCELLATION代表保留
if (checkPointParam.getExternalizedCheckpointCleanup() != null) {
if (checkPointParam.getExternalizedCheckpointCleanup().
equalsIgnoreCase(ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION.name())) {
env.getCheckpointConfig()
.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION);
log.info("本次使用DELETE_ON_CANCELLATION代表删除");
} else if (checkPointParam.getExternalizedCheckpointCleanup().
equalsIgnoreCase(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION.name())) {
env.getCheckpointConfig()
.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
log.info("本次使用RETAIN_ON_CANCELLATION代表保留");
}
}
}
private static void setStateBackend(StreamExecutionEnvironment env,
CheckPointParam checkPointParam) throws IOException {
switch (checkPointParam.getStateBackendEnum()) {
case MEMORY:
log.info("开启MEMORY模式");
env.setStateBackend(
new MemoryStateBackend(MemoryStateBackend.DEFAULT_MAX_STATE_SIZE * 100));
break;
case FILE:
log.info("开启FILE模式");
if (checkPointParam.getAsynchronousSnapshots() != null) {
env.setStateBackend(new FsStateBackend(checkPointParam.getCheckpointDir(),
checkPointParam.getAsynchronousSnapshots()));
} else {
env.setStateBackend(new FsStateBackend(checkPointParam.getCheckpointDir()));
}
break;
case ROCKSDB:
log.info("开启ROCKSDB模式");
if (checkPointParam.getEnableIncremental() != null) {
env.setStateBackend(new RocksDBStateBackend(checkPointParam.getCheckpointDir(),
checkPointParam.getEnableIncremental()));
} else {
env.setStateBackend(new RocksDBStateBackend(checkPointParam.getCheckpointDir()));
}
break;
default:
throw new RuntimeException("不支持这种后端状态" + checkPointParam.getStateBackendEnum());
}
}
}

View File

@ -0,0 +1,36 @@
package com.flink.streaming.core.config;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.table.api.TableEnvironment;
/**
* @author zhuhuipei
* @Description:
* @date 2021/1/17
* @time 23:57
*/
@Slf4j
public class Configurations {
/**
* 单个设置Configuration
*
* @author zhuhuipei
* @date 2021/3/23
* @time 23:58
*/
public static void setSingleConfiguration(TableEnvironment tEnv, String key, String value) {
if (StringUtils.isEmpty(key) || StringUtils.isEmpty(value)) {
return;
}
Configuration configuration = tEnv.getConfig().getConfiguration();
log.info("#############setConfiguration#############\n key={} value={}", key, value);
configuration.setString(key, value);
}
}

View File

@ -0,0 +1,154 @@
package com.flink.streaming.core.execute;
import com.flink.streaming.common.model.SqlCommandCall;
import com.flink.streaming.core.config.Configurations;
import com.flink.streaming.core.logs.LogPrint;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.JobID;
import org.apache.flink.table.api.StatementSet;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.internal.TableEnvironmentInternal;
import org.apache.flink.table.delegation.Parser;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
import org.apache.flink.table.operations.SinkModifyOperation;
import org.apache.flink.table.operations.command.SetOperation;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/21
* @time 17:29
*/
@Slf4j
public class ExecuteSql {
public static JobID exeSql(List<String> sqlList, TableEnvironment tEnv) {
Parser parser = ((TableEnvironmentInternal) tEnv).getParser();
List<ModifyOperation> modifyOperationList = new ArrayList<>();
for (String stmt : sqlList) {
Operation operation = parser.parse(stmt).get(0);
log.info("operation={}", operation.getClass().getSimpleName());
switch (operation.getClass().getSimpleName()) {
//显示
case "ShowTablesOperation":
case "ShowCatalogsOperation":
case "ShowCreateTableOperation":
case "ShowCurrentCatalogOperation":
case "ShowCurrentDatabaseOperation":
case "ShowDatabasesOperation":
case "ShowFunctionsOperation":
case "ShowModulesOperation":
case "ShowPartitionsOperation":
case "ShowViewsOperation":
case "ExplainOperation":
case "DescribeTableOperation":
tEnv.executeSql(stmt).print();
break;
//set
case "SetOperation":
SetOperation setOperation = (SetOperation) operation;
Configurations.setSingleConfiguration(tEnv, setOperation.getKey().get(),
setOperation.getValue().get());
break;
case "BeginStatementSetOperation":
case "EndStatementSetOperation":
System.out.println("####stmt= " + stmt);
log.info("####stmt={}", stmt);
break;
case "DropTableOperation":
case "DropCatalogFunctionOperation":
case "DropTempSystemFunctionOperation":
case "DropCatalogOperation":
case "DropDatabaseOperation":
case "DropViewOperation":
case "CreateTableOperation":
case "CreateViewOperation":
case "CreateDatabaseOperation":
case "CreateCatalogOperation":
case "CreateTableASOperation":
case "CreateCatalogFunctionOperation":
case "CreateTempSystemFunctionOperation":
case "AlterTableOperation":
case "AlterViewOperation":
case "AlterDatabaseOperation":
case "AlterCatalogFunctionOperation":
case "UseCatalogOperation":
case "UseDatabaseOperation":
case "LoadModuleOperation":
case "UnloadModuleOperation":
case "NopOperation":
((TableEnvironmentInternal) tEnv).executeInternal(parser.parse(stmt).get(0));
break;
case "SinkModifyOperation":
modifyOperationList.add((SinkModifyOperation) operation);
break;
default:
log.error("不支持此Operation类型 {}", operation.getClass().getSimpleName());
throw new RuntimeException("不支持该语法 sql=" + stmt);
}
}
TableResult tableResult = ((TableEnvironmentInternal) tEnv)
.executeInternal(modifyOperationList);
if (tableResult.getJobClient().orElse(null) != null) {
return tableResult.getJobClient().get().getJobID();
}
throw new RuntimeException("任务运行失败 没有获取到JobID");
}
/**
* 执行sql 被com.flink.streaming.core.execute.ExecuteSql 替换
*
* @author zhuhuipei
* @date 2021/3/21
* @time 17:33
*/
@Deprecated
public static void exeSql(List<SqlCommandCall> sqlCommandCallList, TableEnvironment tEnv,
StatementSet statementSet) {
for (SqlCommandCall sqlCommandCall : sqlCommandCallList) {
switch (sqlCommandCall.getSqlCommand()) {
//配置
case SET:
Configurations.setSingleConfiguration(tEnv, sqlCommandCall.getOperands()[0],
sqlCommandCall.getOperands()[1]);
break;
//insert 语句
case INSERT_INTO:
case INSERT_OVERWRITE:
LogPrint.logPrint(sqlCommandCall);
statementSet.addInsertSql(sqlCommandCall.getOperands()[0]);
break;
//显示语句
case SELECT:
case SHOW_CATALOGS:
case SHOW_DATABASES:
case SHOW_MODULES:
case SHOW_TABLES:
LogPrint.queryRestPrint(tEnv, sqlCommandCall);
break;
// 兼容sql-client.sh的用法只显示但不执行
case BEGIN_STATEMENT_SET:
case END:
LogPrint.logPrint(sqlCommandCall);
break;
default:
LogPrint.logPrint(sqlCommandCall);
tEnv.executeSql(sqlCommandCall.getOperands()[0]);
break;
}
}
}
}

View File

@ -0,0 +1,74 @@
package com.flink.streaming.core.logs;
import com.flink.streaming.common.enums.SqlCommand;
import com.flink.streaming.common.model.SqlCommandCall;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.table.api.TableEnvironment;
/**
* @author zhuhuipei
* @Description:
* @date 2021/3/21
* @time 22:20
*/
@Slf4j
public class LogPrint {
/**
* 打印SqlCommandCall 日志信息
*
* @author zhuhuipei
* @date 2021/3/21
* @time 11:25
*/
public static void logPrint(SqlCommandCall sqlCommandCall) {
if (sqlCommandCall == null) {
throw new NullPointerException("sqlCommandCall is null");
}
switch (sqlCommandCall.getSqlCommand()) {
case SET:
System.out.println(
"\n############# " + sqlCommandCall.getSqlCommand().name() + " ############# \nSET "
+ sqlCommandCall.getOperands()[0] + "=" + sqlCommandCall.getOperands()[1]);
log.info("\n############# {} ############# \nSET{}={}",
sqlCommandCall.getSqlCommand().name(), sqlCommandCall.getOperands()[0],
sqlCommandCall.getOperands()[1]);
break;
default:
System.out.println(
"\n############# " + sqlCommandCall.getSqlCommand().name() + " ############# \n"
+ sqlCommandCall.getOperands()[0]);
log.info("\n############# {} ############# \n {}", sqlCommandCall.getSqlCommand().name(),
sqlCommandCall.getOperands()[0]);
}
}
/**
* show 语句 select语句结果打印
*
* @author zhuhuipei
* @date 2021/3/21
* @time 11:23
*/
public static void queryRestPrint(TableEnvironment tEnv, SqlCommandCall sqlCommandCall) {
if (sqlCommandCall == null) {
throw new NullPointerException("sqlCommandCall is null");
}
LogPrint.logPrint(sqlCommandCall);
if (sqlCommandCall.getSqlCommand().name().equalsIgnoreCase(SqlCommand.SELECT.name())) {
throw new RuntimeException("目前不支持select 语法使用");
} else {
tEnv.executeSql(sqlCommandCall.getOperands()[0]).print();
}
// if (sqlCommandCall.getSqlCommand().name().equalsIgnoreCase(SqlCommand.SELECT.name())) {
// Iterator<Row> it = tEnv.executeSql(sqlCommandCall.operands[0]).collect();
// while (it.hasNext()) {
// String res = String.join(",", PrintUtils.rowToString(it.next()));
// log.info("数据结果 {}", res);
// }
// }
}
}

View File

@ -0,0 +1,32 @@
package com.flink.streaming.core.model;
import com.flink.streaming.common.enums.JobTypeEnum;
import com.flink.streaming.common.model.CheckPointParam;
import lombok.Data;
/**
* @author zhuhuipei
* @Description:
* @date 2020-08-21
* @time 02:10
*/
@Data
public class JobRunParam {
/**
* sql语句目录
*/
private String sqlPath;
/**
* 任务类型
*/
private JobTypeEnum jobTypeEnum;
/**
* CheckPoint 参数
*/
private CheckPointParam checkPointParam;
}

View File

@ -0,0 +1,37 @@
SET 'table.local-time-zone' = 'Asia/Shanghai';
SET pipeline.name= my_Flink_job;
SHOW MODULES;
CREATE TABLE source_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'datagen',
'rows-per-second'='5'
);
CREATE TABLE print_table (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'print'
);
CREATE TABLE print_table2 (
f0 INT,
f1 INT,
f2 STRING
) WITH (
'connector' = 'print'
);
desc print_table2;
insert into print_table select f0,f1,f2 from source_table;
insert into print_table2 select f0,f1,f2 from source_table;

View File

@ -0,0 +1,43 @@
import com.flink.streaming.common.sql.SqlFileParser;
import com.flink.streaming.core.execute.ExecuteSql;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class Demo {
private static String test_sql_file = "/Users/liquan/workspace/git_project/flink-streaming-platform-web/flink-streaming-core/src/test.sql";
// private static String test_sql_file = "/Users/edy/git/flink-streaming-platform-web/flink-streaming-core/src/test.sql";
public static void main(String[] args) throws Exception {
EnvironmentSettings settings = null;
TableEnvironment tEnv = null;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
tEnv = StreamTableEnvironment.create(env, settings);
List<String> fileList = Files.readAllLines(Paths.get(test_sql_file));
// List<SqlCommand> res=SqlFileParser.fileToSqlCommand(fileList,tEnv);
// System.out.println(res);
List<String> sqlList = SqlFileParser.parserSql(fileList);
System.out.println(sqlList);
ExecuteSql.exeSql(sqlList,tEnv);
}
}

View File

@ -0,0 +1,115 @@
import com.flink.streaming.core.config.Configurations;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.internal.TableEnvironmentInternal;
import org.apache.flink.table.delegation.Parser;
import org.apache.flink.table.operations.Operation;
import org.apache.flink.table.operations.command.SetOperation;
/**
* @author zhuhuipei
* @Description:
* @date 2020-07-20
* @time 21:36
*/
public class Test {
private static List<String> sqlList = new ArrayList<>();
static {
sqlList.add("SET 'table.local-time-zone' = 'Asia/Shanghai' ");
sqlList.add(" CREATE TABLE source_table (\n"
+ " f0 INT,\n"
+ " f1 INT,\n"
+ " f2 STRING\n"
+ " ) WITH (\n"
+ " 'connector' = 'datagen',\n"
+ " 'rows-per-second'='5'\n"
+ " )");
sqlList.add(" CREATE TABLE print_table (\n"
+ " f0 INT,\n"
+ " f1 INT,\n"
+ " f2 STRING\n"
+ " ) WITH (\n"
+ " 'connector' = 'print'\n"
+ " )");
sqlList.add("show tables");
sqlList.add("insert into print_table select f0,f1,f2 from source_table");
}
static EnvironmentSettings settings = null;
static TableEnvironment tEnv = null;
public static void main(String[] args) throws IOException {
// System.out.println(File.separator);
//
// ClassLoader threadClassLoader = Thread.currentThread().getContextClassLoader();
// System.out.println(threadClassLoader);
//
// List<String> sql = Files.readAllLines(Paths.get("D:\\ideaprojects\\flink-test\\src\\main\\resources\\online.sql"));
// List<SqlCommandCall> sqlCommandCallList = SqlFileParser.fileToSql(sql);
// for (SqlCommandCall sqlCommandCall : sqlCommandCallList) {
// LogPrint.logPrint(sqlCommandCall);
// }
// Parser parser=new ParserImpl();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
tEnv = StreamTableEnvironment.create(env, settings);
Parser parser = ((TableEnvironmentInternal) tEnv).getParser();
for (String sql : sqlList) {
Operation operation = parser.parse(sql).get(0);
System.out.println(operation.getClass());
switch (operation.getClass().getSimpleName()) {
case "ShowTablesOperation":
tEnv.executeSql(sql).print();
break;
case "SetOperation":
SetOperation setOperation = (SetOperation) operation;
Configurations.setSingleConfiguration(tEnv, setOperation.getKey().get(),
setOperation.getValue().get());
((TableEnvironmentInternal) tEnv).executeInternal(parser.parse(sql).get(0));
break;
default:
((TableEnvironmentInternal) tEnv).executeInternal(parser.parse(sql).get(0));
}
// if (operation instanceof ShowTablesOperation){
//
// }else{
//
// }
}
// TableResult tableResult = statementSet.execute();
//
// JobID jobID = tableResult.getJobClient().get().getJobID();
//
// System.out.println(SystemConstant.QUERY_JOBID_KEY_WORD + jobID);
// List<Operation> list =new ArrayList<>();
//
// System.out.println(list);
// for (Operation operation : list) {
// ((TableEnvironmentInternal) tEnv).executeInternal(operation);
// }
}
}

View File

@ -0,0 +1,17 @@
-- SET 'table.local-time-zone' = 'Asia/Shanghai';
SET pipline.name= my_Flink_job;
CREATE TABLE MyTable1 (`count` bigint, word VARCHAR(256)) WITH ('connector' = 'datagen');
CREATE TABLE MyTable2 (`count` bigint, word VARCHAR(256)) WITH ('connector' = 'datagen');
EXPLAIN PLAN FOR SELECT `count`, word FROM MyTable1 WHERE word LIKE 'F%' UNION ALL
SELECT `count`, word FROM MyTable2;
EXPLAIN ESTIMATED_COST, CHANGELOG_MODE, JSON_EXECUTION_PLAN SELECT `count`, word FROM MyTable1
WHERE word LIKE 'F%'
UNION ALL
SELECT `count`, word FROM MyTable2;

View File

@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-streaming-platform-web</artifactId>
<groupId>com.streaming.platform.web</groupId>
<version>1.1</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>flink-streaming-validation</artifactId>
<dependencies>
<dependency>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-commom</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>flink-table-api-scala_2.11</artifactId>
<groupId>org.apache.flink</groupId>
</exclusion>
<exclusion>
<artifactId>jsr305</artifactId>
<groupId>com.google.code.findbugs</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,23 @@
package com.flink.streaming.sql.util;
/**
* @author zhuhuipei
* @Description:
* @date 2021/4/5
* @time 10:05
*/
public class ValidationConstants {
public static final String MESSAGE_010 = "必须包含 insert or insert overwrite 语句";
public static final String MESSAGE_011 = "暂时不支持直接使用select语句请使用 insert into select 语法 或者使用 print 连接器打印结果";
public static final String TABLE_SQL_DIALECT_1 = "table.sql-dialect";
public static final String INSERT = "INSERT";
public static final String SELECT = "SELECT";
public static final String SPLIT_1 = "'";
public static final String SPACE = "";
}

View File

@ -0,0 +1,247 @@
package com.flink.streaming.sql.validation;
import com.flink.streaming.common.constant.SystemConstant;
import com.flink.streaming.common.enums.SqlCommand;
import com.flink.streaming.common.model.SqlCommandCall;
import com.flink.streaming.common.sql.SqlFileParser;
import com.flink.streaming.sql.util.ValidationConstants;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import lombok.extern.slf4j.Slf4j;
import org.apache.calcite.config.Lex;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.validate.SqlConformance;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.sql.parser.validate.FlinkSqlConformance;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.SqlDialect;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.config.TableConfigOptions;
import org.apache.flink.table.planner.calcite.CalciteConfig;
import org.apache.flink.table.planner.delegation.FlinkSqlParserFactories;
import org.apache.flink.table.planner.parse.CalciteParser;
import org.apache.flink.table.planner.utils.JavaScalaConversionUtil;
import org.apache.flink.table.planner.utils.TableConfigUtils;
/*
* 数据校验
* @Author: zhuhuipei
* @date 2022/6/25
*/
@Slf4j
public class SqlValidation {
public static void explainStmt(List<String> stmtList) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
TableEnvironment tEnv = StreamTableEnvironment.create(env, settings);
TableConfig config = tEnv.getConfig();
String sql = null;
boolean isInsertSql = false;
boolean isSelectSql = false;
try {
for (String stmt : stmtList) {
sql = stmt.trim();
Boolean setSuccess = setSqlDialect(sql, config);
CalciteParser parser = new CalciteParser(getSqlParserConfig(config));
if (setSuccess) {
log.info("set 成功 sql={}", sql);
continue;
}
SqlNode sqlNode = parser.parse(sql);
if (ValidationConstants.INSERT.equalsIgnoreCase(sqlNode.getKind().name())) {
isInsertSql = true;
}
if (ValidationConstants.SELECT.equalsIgnoreCase(sqlNode.getKind().name())) {
isSelectSql = true;
}
log.info("sql:{} 校验通过", sql);
}
} catch (Exception e) {
log.error("语法错误: {} 原因是: ", sql, e);
throw new RuntimeException("语法错误:" + sql + " 原因: " + e.getMessage());
}
if (!isInsertSql) {
throw new RuntimeException(ValidationConstants.MESSAGE_010);
}
if (isSelectSql) {
throw new RuntimeException(ValidationConstants.MESSAGE_011);
}
log.info("全部语法校验成功");
}
/**
* @author zhuhuipei
* @date 2021/3/27
* @time 10:10
*/
@Deprecated
public static void preCheckSql(List<String> sql) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
TableEnvironment tEnv = StreamTableEnvironment.create(env, settings);
List<SqlCommandCall> sqlCommandCallList = SqlFileParser.fileToSql(sql);
if (CollectionUtils.isEmpty(sqlCommandCallList)) {
throw new RuntimeException("没解析出sql请检查语句 如 缺少;号");
}
TableConfig config = tEnv.getConfig();
String value = null;
boolean isInsertSql = false;
boolean isSelectSql = false;
try {
for (SqlCommandCall sqlCommandCall : sqlCommandCallList) {
value = sqlCommandCall.getOperands()[0];
switch (sqlCommandCall.getSqlCommand()) {
//配置
case SET:
String key = sqlCommandCall.getOperands()[0];
String val = sqlCommandCall.getOperands()[1];
if (val.contains(SystemConstant.LINE_FEED)) {
throw new RuntimeException("set 语法值异常:" + val);
}
if (TableConfigOptions.TABLE_SQL_DIALECT.key().equalsIgnoreCase(key.trim())
&& SqlDialect.HIVE.name().equalsIgnoreCase(val.trim())) {
config.setSqlDialect(SqlDialect.HIVE);
} else {
config.setSqlDialect(SqlDialect.DEFAULT);
}
break;
case BEGIN_STATEMENT_SET:
case END:
break;
//其他
default:
if (SqlCommand.INSERT_INTO.equals(sqlCommandCall.getSqlCommand())
|| SqlCommand.INSERT_OVERWRITE.equals(sqlCommandCall.getSqlCommand())) {
isInsertSql = true;
}
if (SqlCommand.SELECT.equals(sqlCommandCall.getSqlCommand())) {
isSelectSql = true;
}
CalciteParser parser = new CalciteParser(getSqlParserConfig(config));
parser.parse(sqlCommandCall.getOperands()[0]);
break;
}
}
} catch (Exception e) {
log.warn("语法异常: sql={} 原因是: {}", value, e);
throw new RuntimeException("语法异常 sql=" + value + " 原因: " + e.getMessage());
}
if (!isInsertSql) {
throw new RuntimeException(ValidationConstants.MESSAGE_010);
}
if (isSelectSql) {
throw new RuntimeException(ValidationConstants.MESSAGE_011);
}
}
private static SqlParser.Config getSqlParserConfig(TableConfig tableConfig) {
return JavaScalaConversionUtil.toJava(getCalciteConfig(tableConfig).getSqlParserConfig())
.orElseGet(
() -> {
SqlConformance conformance = getSqlConformance(tableConfig.getSqlDialect());
return SqlParser
.config()
.withParserFactory(FlinkSqlParserFactories.create(conformance))
.withConformance(conformance)
.withLex(Lex.JAVA)
.withIdentifierMaxLength(256);
}
);
}
private static CalciteConfig getCalciteConfig(TableConfig tableConfig) {
return TableConfigUtils.getCalciteConfig(tableConfig);
}
private static FlinkSqlConformance getSqlConformance(SqlDialect sqlDialect) {
switch (sqlDialect) {
case HIVE:
return FlinkSqlConformance.HIVE;
case DEFAULT:
return FlinkSqlConformance.DEFAULT;
default:
throw new TableException("Unsupported SQL dialect: " + sqlDialect);
}
}
/**
* 字符串转sql
*/
public static List<String> toSqlList(String sql) {
if (StringUtils.isEmpty(sql)) {
return Collections.emptyList();
}
return Arrays.asList(sql.split(SystemConstant.LINE_FEED));
}
/**
* 设置方言
*
* @Param:[sql, tableConfig]
* @return: java.lang.Boolean
* @Author: zhuhuipei
* @date 2022/6/24
*/
private static Boolean setSqlDialect(String sql, TableConfig tableConfig) {
final Matcher matcher = SqlCommand.SET.getPattern().matcher(sql);
if (matcher.matches()) {
final String[] groups = new String[matcher.groupCount()];
for (int i = 0; i < groups.length; i++) {
groups[i] = matcher.group(i + 1);
}
String key = groups[1].replace(ValidationConstants.SPLIT_1, ValidationConstants.SPACE).trim();
String val = groups[2];
if (ValidationConstants.TABLE_SQL_DIALECT_1.equalsIgnoreCase(key)) {
if (SqlDialect.HIVE.name().equalsIgnoreCase(
val.replace(ValidationConstants.SPLIT_1, ValidationConstants.SPACE).trim())) {
tableConfig.setSqlDialect(SqlDialect.HIVE);
} else {
tableConfig.setSqlDialect(SqlDialect.DEFAULT);
}
} else {
Configuration configuration = tableConfig.getConfiguration();
configuration.setString(key, val);
}
return true;
}
return false;
}
}

View File

@ -0,0 +1,30 @@
package com.flink.streaming.sql.validation.test;
import com.flink.streaming.common.sql.SqlFileParser;
import com.flink.streaming.sql.validation.SqlValidation;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import org.junit.Test;
/**
* @author zhuhuipei
* @Description:
* @date 2021/1/17
* @time 22:30
*/
public class TestSqlValidation {
private static String test_sql_file = "/Users/liquan/workspace/git_project/flink-streaming-platform-web/flink-streaming-core/src/hive-test.sql";
// private static String test_sql_file = "/Users/edy/git/flink-streaming-platform-web/flink-streaming-core/src/hive-test.sql";
@Test
public void checkSql() throws IOException {
List<String> list = Files.readAllLines(Paths.get(test_sql_file));
List<String> sqlList = SqlFileParser.parserSql(list);
SqlValidation.explainStmt(sqlList);
//SqlValidation.preCheckSql(list);
}
}

View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-streaming-platform-web</artifactId>
<groupId>com.streaming.platform.web</groupId>
<version>1.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>flink-streaming-web-alarm</artifactId>
<dependencies>
<dependency>
<groupId>com.streaming.platform.web</groupId>
<artifactId>flink-streaming-web-common</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,19 @@
package com.flink.streaming.web.alarm;
/**
* @author zhuhuipei
* @Description:
* @date 2020-09-23
* @time 23:59
*/
public interface DingDingAlarm {
/**
* @author zhuhuipei
* @date 2020-09-25
* @time 23:02
*/
boolean send(String url, String content);
}

View File

@ -0,0 +1,21 @@
package com.flink.streaming.web.alarm;
import com.flink.streaming.web.model.vo.CallbackDTO;
/**
* @author zhuhuipei
* @Description:
* @date 2021/2/21
* @time 11:38
*/
public interface HttpAlarm {
/**
* 回调http
*
* @author zhuhuipei
* @date 2021/2/21
* @time 11:39
*/
boolean send(String url, CallbackDTO callbackDTO);
}

Some files were not shown because too many files have changed in this diff Show More