大数据

jsqlparser 表级血缘关系获取

适用于INSERT语句,明确需要目标表以及来源表的场景

pom.xml

<dependencies>
        <!-- JSqlParser: SQL 解析 -->
        <dependency>
            <groupId>com.github.jsqlparser</groupId>
            <artifactId>jsqlparser</artifactId>
            <version>4.9</version>
        </dependency>
</dependencies>

pojo

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.List;

@Data
@NoArgsConstructor
@AllArgsConstructor
public class TableLineage {
    private String targetTable;
    private List<String> sourceTables;
}

main

import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
import net.sf.jsqlparser.statement.Statement;
import net.sf.jsqlparser.statement.insert.Insert;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.util.TablesNamesFinder;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class SqlLineageParser {

    /**
     * 解析SQL语句,提取目标表和源表信息
     * @param sql 待解析的SQL语句
     * @return TableLineage POJO对象,包含目标表和源表列表
     * @throws JSQLParserException 如果SQL解析失败
     */
    public static TableLineage getTableLineage(String sql) throws JSQLParserException {
        // 预处理SQL
        // 替换OVERWRITE TABLE
        String processedSql = sql.replaceAll("(?i)OVERWRITE\\s+TABLE", "INTO");
        // 替换 PARTITION语法
        processedSql = processedSql.replaceAll("(?i)\\s+PARTITION\\s*\\([^)]*\\)", "");

        // 使用TablesNamesFinder获取所有表名
        Set<String> allTableNames = TablesNamesFinder.findTables(processedSql);
        // System.out.println("All tables found: " + allTableNames);

        // 解析SQL语句
        Statement statement = CCJSqlParserUtil.parse(processedSql);

        if (!(statement instanceof Insert)) {
            throw new IllegalArgumentException("Only INSERT statements are supported");
        }

        Insert insert = (Insert) statement;
        String targetTable = insert.getTable().getFullyQualifiedName();
        Set<String> allTables = TablesNamesFinder.findTables(processedSql);
        allTables.remove(targetTable); // 移除目标表,剩余为源表

        // 创建并返回POJO对象
        return new TableLineage(targetTable, new ArrayList<>(allTables));
    }
}