PDF文書からファイル名を自動生成!Javaプログラムで業務効率アップ

PDF文書からファイル名を自動生成!Javaプログラムで業務効率アップ 無料ツール
この記事は約21分で読めます。

作成・設定手順

Javaで処理を担当するプログラムを作成する

package com.rinTech.pdfPageCustomNamer;

import java.awt.geom.Rectangle2D;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.text.PDFTextStripperByArea;

public class SplitPdfAndRename {

    public static void main(String[] args) {

        String sourcePdfPath = "";
        String newPdfPath = "";

        if (args.length > 0 && args[0].trim().length() > 0) {
            sourcePdfPath = args[0].trim();
        } else {
            sourcePdfPath = "D:\\日報\\作業日報.pdf";
        };

        if (args.length > 1 && args[1].trim().length() > 0) {
            newPdfPath = args[1].trim() + "\\";
        } else {
            newPdfPath = "D:\\日報\\";
        };  

        PDDocument document = null;
        try {
            document = PDDocument.load(new File(sourcePdfPath));
            Iterable<PDPage> pages = document.getPages();
            int pageIndex = 0;
            for (PDPage page : pages) {
                PDDocument newDoc = null;
                try {
                    newDoc = new PDDocument();
                    newDoc.addPage(page);
                    String extractedText = extractTextFromPage(page);
                    String specificText = extractSpecificText(newPdfPath, extractedText);
                    if (!specificText.isEmpty()) {
                        newDoc.save(newPdfPath +  specificText + ".pdf");
                    }
                    pageIndex++;

                } catch (IOException e) {
                    e.printStackTrace();
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    if (newDoc != null) {
                        newDoc.close();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (document != null) {
                try {
                    document.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private static String extractTextFromPage(PDPage page) throws IOException {
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);
        Rectangle2D region = new Rectangle2D.Double(0, 0, 200, 100); 
        String regionName = "region";
        stripper.addRegion(regionName, region);
        stripper.extractRegions(page);
        return stripper.getTextForRegion(regionName);

    }

    private static String extractSpecificText(String newPdfPath, String text) {
        String patternString = "日報番号(.+?)";
        Pattern pattern = Pattern.compile(patternString);
        Matcher matcher = pattern.matcher(text);
        if (matcher.find()) {
            int start = matcher.start();
            int end = Math.min(start + 30, text.length());
            String substring = text.substring(start, end);

            Matcher matcherSub = pattern.matcher(substring);
            if (matcherSub.find()) {
                String foundText = substring;
                foundText = foundText.replaceAll("[^0-9-]", "");
                if (!foundText.isEmpty()) {
                    String fullPath = newPdfPath + foundText + ".pdf";
                    File file = new File(fullPath);
                    if (file.exists()) {
                        LocalDateTime now = LocalDateTime.now();
                        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSSSSS");
                        return now.format(formatter);
                    } else {
                        return foundText;
                    }
                }
            }
        }
        LocalDateTime now = LocalDateTime.now();
        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSSSSS");
        return now.format(formatter);
    }
    
}

javaで画面を担当するプログラムを作成する

package com.rinTech.pdfPageCustomNamer;

import java.awt.geom.Rectangle2D;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.text.PDFTextStripperByArea;

public class SplitPdfAndRename {

    public static void main(String[] args) {

        String sourcePdfPath = "";
        String newPdfPath = "";

        if (args.length > 0 && args[0].trim().length() > 0) {
            sourcePdfPath = args[0].trim();
        } else {
            sourcePdfPath = "D:\\日報\\作業日報.pdf";
        };

        if (args.length > 1 && args[1].trim().length() > 0) {
            newPdfPath = args[1].trim() + "\\";
        } else {
            newPdfPath = "D:\\日報\\";
        };  

        PDDocument document = null;
        try {
            document = PDDocument.load(new File(sourcePdfPath));
            Iterable<PDPage> pages = document.getPages();
            int pageIndex = 0;
            for (PDPage page : pages) {
                PDDocument newDoc = null;
                try {
                    newDoc = new PDDocument();
                    newDoc.addPage(page);
                    String extractedText = extractTextFromPage(page);
                    String specificText = extractSpecificText(newPdfPath, extractedText);
                    if (!specificText.isEmpty()) {
                        newDoc.save(newPdfPath +  specificText + ".pdf");
                    }
                    pageIndex++;

                } catch (IOException e) {
                    e.printStackTrace();
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    if (newDoc != null) {
                        newDoc.close();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (document != null) {
                try {
                    document.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private static String extractTextFromPage(PDPage page) throws IOException {
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);
        Rectangle2D region = new Rectangle2D.Double(0, 0, 200, 100); 
        String regionName = "region";
        stripper.addRegion(regionName, region);
        stripper.extractRegions(page);
        return stripper.getTextForRegion(regionName);

    }

    private static String extractSpecificText(String newPdfPath, String text) {
        String patternString = "日報番号(.+?)";
        Pattern pattern = Pattern.compile(patternString);
        Matcher matcher = pattern.matcher(text);
        if (matcher.find()) {
            int start = matcher.start();
            int end = Math.min(start + 30, text.length());
            String substring = text.substring(start, end);

            Matcher matcherSub = pattern.matcher(substring);
            if (matcherSub.find()) {
                String foundText = substring;
                foundText = foundText.replaceAll("[^0-9-]", "");
                if (!foundText.isEmpty()) {
                    String fullPath = newPdfPath + foundText + ".pdf";
                    File file = new File(fullPath);
                    if (file.exists()) {
                        LocalDateTime now = LocalDateTime.now();
                        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSSSSS");
                        return now.format(formatter);
                    } else {
                        return foundText;
                    }
                }
            }
        }
        LocalDateTime now = LocalDateTime.now();
        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSSSSS");
        return now.format(formatter);
    }
    
}

Mavenのpom.xmlを作成する

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.rinTech</groupId>
    <artifactId>PdfPageCustomNamer</artifactId>
    <version>1.0</version>
    <packaging>jar</packaging>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <exec.mainClass>com.rinTech.pdfPageCustomNamer.SplitPdfAndRenameF</exec.mainClass>
    </properties>
     <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.2.4</version> 
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <createDependencyReducedPom>false</createDependencyReducedPom>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.rinTech.pdfPageCustomNamer.SplitPdfAndRenameF</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>        
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>3.2.0</version>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <classpathPrefix>libs/</classpathPrefix>
                            <mainClass>com.rinTech.pdfPageCustomNamer.SplitPdfAndRenameF</mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin>
            <plugin>
                <groupId>com.akathist.maven.plugins.launch4j</groupId>
                <artifactId>launch4j-maven-plugin</artifactId>
                <executions>
                    <execution>
                        <goals>
                            <goal>launch4j</goal>
                        </goals>
                        <configuration>
                            <headerType>gui</headerType>
                            <jar>${project.build.directory}/PdfPageCustomNamer-1.0.jar</jar>
                            <outfile>${project.build.directory}/PdfPageCustomNamer.exe</outfile>
                            <icon>src/main/resources/scary.ico</icon>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

        </plugins>
    </build>   
</project>

最後に私からの宣伝

自分達でシステム構築できそうですか?

『専任の担当者がいないから無理かなぁ』
『担当者に構築する時間がないからなぁ』
『一人でやりきる自信がない』
『導入・構築までやってくれないかなぁ』

と色々と思う方もいたのではないでしょうか?

そんな方に朗報です。
御社が望む環境をお伝え頂き
環境構築・システム構築のお手伝いをさせて頂きます。
別途、メニューを設けておりますので、興味を持たれた方は
見積り希望と明記のうえお問い合わせ下さい
連絡お待ちしております。ここをクリック
※無理な場合もありますので、その際にはご理解の程宜しくお願い致します。

コメント

タイトルとURLをコピーしました