
java解析pdf获取pdf中内容信息
【代码】java解析pdf获取pdf中内容信息。
·
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.*;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.env.Environment;
import org.springframework.security.core.annotation.AuthenticationPrincipal;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
* @Auther: dingqiming
* @Date: 2022/12/05 11:14
* @Description:返回pdf中论文题目的内容
* @param saveName
* @return
* @throws IOException
*/
public String readLwtm(String saveName) throws IOException {
String lwtm = "";
StringBuffer pageContent = new StringBuffer();
String begStr = "题 目";
String endStr = "姓 名";
// File file = new File("G:\\答辩.pdf");
// InputStream inputStream = new FileInputStream(file);
InputStream inputStream = ftpOperation.downloadFile(saveName);
PdfReader reader = new PdfReader(inputStream);
int pageNum = reader.getNumberOfPages();
for(int i=1;i<=pageNum;i++){
//读取第i页的文档内容S
pageContent.append(PdfTextExtractor.getTextFromPage(reader, i));
}
String nr = pageContent.toString();
int beingIndex = (nr.indexOf(begStr));
int endIndex = nr.indexOf(endStr);
if (beingIndex >= 0 && endIndex >= 0){
lwtm = nr.substring((beingIndex+begStr.length()),endIndex);
lwtm = lwtm.replaceAll(" ","").replaceAll("\n","");
}
return lwtm;
}
更多推荐
所有评论(0)