java 图片识别

需求

公司需求需要识别工单图片，后续再对识别到的内容入库。
网上查了下开源的，java 使用主要发现两个

test4j
RapidOcr-java：https://github.com/MyMonsterCat/RapidOcr-Java

两者使用都很简单，但是前者识别率很低，本文主要介绍后者

引入仓库

maven 配置

<!--  rapidocr这个必须要引入     -->
<!--  可前往maven中央仓库https://central.sonatype.com/artifact/io.github.mymonstercat/rapidocr/versions，查看版本      -->
<dependency>
    <groupId>io.github.mymonstercat</groupId>
    <artifactId>rapidocr</artifactId>
    <version>${rapidocr.version}</version>
</dependency>

<!--  一般只需要引入一个，CPU端建议使用onnx，移动端建议使用ncnn     -->
<!--  可前往maven中央仓库https://central.sonatype.com/artifact/io.github.mymonstercat/rapidocr-onnx-platform/versions，查看版本      -->
<dependency>
    <groupId>io.github.mymonstercat</groupId>
    <artifactId>rapidocr-onnx-platform</artifactId>
    <version>${rapidocr-onnx-platform.version}</version>
</dependency>

基本使用

使用起来很简单，代码如下

// 配置可变参数
ParamConfig paramConfig = ParamConfig.getDefaultConfig();
paramConfig.setDoAngle(true);
paramConfig.setMostAngle(true);
InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V3);
// 开始识别
OcrResult ocrResult = engine.runOcr(path, paramConfig);

进阶

图片上所有汉字均会识别出来，如果定位到需要的地方?

可根据坐标来识别相同行，y 轴误差在一定范围内，视为同一行，然后再按 x 轴排序

/**
 * 按行分组，过滤出需要的信息
 * @param textBlocks 识别结果
 * @return 行分组的数据
 */
private List<List<String>> filterGroupRowData(List<TextBlock> textBlocks) {
    List<List<TextBlock>> groupedPoints = new ArrayList<>();
    List<TextBlock> currentGroup = new ArrayList<>();

    int yDiff = getYDiff(textBlocks);
    // 根据y轴坐标，汇总相同列的数据
    for (TextBlock textBlock : textBlocks) {
        Point point = textBlock.getBoxPoint().get(0);
        if (currentGroup.isEmpty()) {
            currentGroup.add(textBlock);
        } else {
            TextBlock lastTextBlock = currentGroup.get(currentGroup.size() - 1);
            Point lastPoint = lastTextBlock.getBoxPoint().get(0);
            if (Math.abs(point.getY() - lastPoint.getY()) <= yDiff) {
                currentGroup.add(textBlock);
            } else {
                groupedPoints.add(new ArrayList<>(currentGroup));
                currentGroup.clear();
                currentGroup.add(textBlock);
            }
        }
    }
    if (!currentGroup.isEmpty()) {
        groupedPoints.add(currentGroup);
    }

    // 根据x轴坐标排序，获取排序后的内容
    return groupedPoints.stream()
            .peek(group -> group.sort(Comparator.comparingInt(t -> t.getBoxPoint().get(0).getX())))
            .map(group -> group.stream().map(t -> t.getText().trim()).collect(Collectors.toList()))
            .collect(Collectors.toList());
}

/**
 * 区分不同列的y轴差值
 * @param textBlocks 识别结果
 * @return 区分不同列的y轴差值
 */
private int getYDiff(List<TextBlock> textBlocks) {
    if (CollUtil.isEmpty(textBlocks) || textBlocks.size() <=1) {
        return 0;
    }
    Point fristPoint = textBlocks.get(0).getBoxPoint().get(0);
    Point secondPoint = textBlocks.get(1).getBoxPoint().get(0);
    return NumberUtil.round(secondPoint.getY()-fristPoint.getY(),0).intValue();
}

绘制图片，补充矩形框底下文字

/**
 * 图片工具类
 * 参考自 <a href="https://github.com/nn200433/tika-server/blob/main/tika-server-paddle-ocr/src/main/java/cn/nn200433/tika/parser/ocr/paddle/utils/ImageUtil.java">ImageUtil</a>
 */
@Slf4j
public class ImageUtil extends ImgUtil {


    /**
     * 绘制图片
     * <p>
     * 根据传入的坐标点绘制矩形框，并在矩形框底下添加文字
     * </p>
     *
     * @param stream    数据流
     * @param blockList 文本块列表
     * @return {@link OutputStream }
     * @author song_jx
     */
    public static ByteArrayOutputStream drawImg(InputStream stream, List<TextBlock> blockList) {
        // 读取输入流中的图片
        BufferedImage image = read(stream);

        // 开启画笔绘制
        Graphics2D g2d = image.createGraphics();
        for (final TextBlock textBlock : blockList) {
            final List<Point> boxPoint = textBlock.getBoxPoint();
            final String      text     = textBlock.getText();
            // 1. 算出矩形框
            final Rectangle box    = calcRectangle(boxPoint);
            final int       x      = box.x;
            final int       y      = box.y;
            final int       width  = box.width;
            final int       height = box.height;
            // 2. 绘制矩形框
            g2d.setColor(Color.RED);
            g2d.drawRect(x, y, width, height);
            // 3. 在矩形框的左下角添加文字
            g2d.setColor(Color.BLACK);
            g2d.drawString(text, x, y + height + 15);
        }
        g2d.dispose();

        ByteArrayOutputStream os = new ByteArrayOutputStream();
        write(image, IMAGE_TYPE_PNG, os);
        return os;
    }

    /**
     * 绘制图片
     * <p>
     * 根据传入的坐标点绘制矩形框，并在矩形框底下添加文字
     * </p>
     *
     * @param imageFile 图片文件
     * @param blockList 文本块列表
     * @return {@link OutputStream }
     * @author song_jx
     */
    public static ByteArrayOutputStream drawImg(File imageFile, List<TextBlock> blockList) {
        ByteArrayOutputStream os = null;
        try (InputStream is = new FileInputStream(imageFile)) {
            os = drawImg(is, blockList);
        } catch (Exception e) {
            log.error("图片绘制异常", e);
        }
        return os;
    }

    /**
     * 计算矩形框
     *
     * <p>根据传入的4个坐标点，得出矩形框的左上角及长宽</p>
     *
     * @param pointList 点列表
     * @return {@link Rectangle }
     * @author song_jx
     */
    private static Rectangle calcRectangle(List<Point> pointList) {
        Assert.isFalse(CollUtil.isEmpty(pointList) || pointList.size() != 4, "需要4个点来构成矩形");
        int minX = Integer.MAX_VALUE;
        int minY = Integer.MAX_VALUE;
        int maxX = Integer.MIN_VALUE;
        int maxY = Integer.MIN_VALUE;
        // 找到最小和最大的 x、y 坐标
        for (Point point : pointList) {
            final int x = point.getX();
            final int y = point.getY();
            if (x < minX) {
                minX = x;
            }
            if (y < minY) {
                minY = y;
            }
            if (x > maxX) {
                maxX = x;
            }
            if (y > maxY) {
                maxY = y;
            }
        }
        // 矩形的左上角坐标即是最小x、y
        final int width  = maxX - minX;
        final int height = maxY - minY;
        return new Rectangle(minX, minY, width, height);
    }



}