文章
jsqlparser 表级血缘关系获取
适用于INSERT语句,明确需要目标表以及来源表的场景
pom.xml
<dependencies>
<!-- JSqlParser: SQL 解析 -->
<dependency>
<groupId>com.github.jsqlparser</groupId>
<artifactId>jsqlparser</artifactId>
<version>4.9</version>
</dependency>
</dependencies>pojo
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class TableLineage {
private String targetTable;
private List<String> sourceTables;
}main
import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
import net.sf.jsqlparser.statement.Statement;
import net.sf.jsqlparser.statement.insert.Insert;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.util.TablesNamesFinder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class SqlLineageParser {
/**
* 解析SQL语句,提取目标表和源表信息
* @param sql 待解析的SQL语句
* @return TableLineage POJO对象,包含目标表和源表列表
* @throws JSQLParserException 如果SQL解析失败
*/
public static TableLineage getTableLineage(String sql) throws JSQLParserException {
// 预处理SQL
// 替换OVERWRITE TABLE
String processedSql = sql.replaceAll("(?i)OVERWRITE\\s+TABLE", "INTO");
// 替换 PARTITION语法
processedSql = processedSql.replaceAll("(?i)\\s+PARTITION\\s*\\([^)]*\\)", "");
// 使用TablesNamesFinder获取所有表名
Set<String> allTableNames = TablesNamesFinder.findTables(processedSql);
// System.out.println("All tables found: " + allTableNames);
// 解析SQL语句
Statement statement = CCJSqlParserUtil.parse(processedSql);
if (!(statement instanceof Insert)) {
throw new IllegalArgumentException("Only INSERT statements are supported");
}
Insert insert = (Insert) statement;
String targetTable = insert.getTable().getFullyQualifiedName();
Set<String> allTables = TablesNamesFinder.findTables(processedSql);
allTables.remove(targetTable); // 移除目标表,剩余为源表
// 创建并返回POJO对象
return new TableLineage(targetTable, new ArrayList<>(allTables));
}
}