「javapdf文件读取」java生成pdf

admin 2022-11-26 03:11:07 2061

今天给各位分享javapdf文件读取的知识，其中也会对java生成pdf进行解释，如果能碰巧解决你现在面临的问题，别忘了关注本站，现在开始吧！

本文目录一览：

1、怎样用JAVA编程实现读取PDF文件中的文字或英文保存到TXT文档中，不使用第三方jar包。
2、java 如何读取PDF文件内容
3、如何用java读取pdf文档的部分内容
4、用java读取pdf
5、怎么用java读取pdf文件内容
6、如何使用java读取PDF文件

怎样用JAVA编程实现读取PDF文件中的文字或英文保存到TXT文档中，不使用第三方jar包。

1、创建一个路径为要读取的txt文件的file对象rFile。2、创建一个路径为要写入的txt文件的file对象wFile。3、创建一个FileReader对象，传入rFile到构造器。4、准备一个char数组，FileReader类有一个继承自java.io.Reader的read(char[]cbuf)方法，将字符读入数组。5、创建一个FileWriter对象，传入wFile到构造器。6、FileWriter类有一个继承自java.io.Writer的write(char[]cbuf)方法，可以写入字符数组。7、最后别忘了关闭流。

java 如何读取PDF文件内容

import java.io.File;

import java.io.FileOutputStream;

import java.io.OutputStreamWriter;

import java.io.Writer;

import java.net.MalformedURLException;

import java.net.URL;

import org.pdfbox.pdmodel.PDDocument;

import org.pdfbox.util.PDFTextStripper;

public class PdfReader {

public void readFdf(String file) throws Exception {

// 是否排序

boolean sort = false;

// pdf文件名

String pdfFile = file;

// 输入文本文件名称

String textFile = null;

// 编码方式

String encoding = "UTF-8";

// 开始提取页数

int startPage = 1;

// 结束提取页数

int endPage = Integer.MAX_VALUE;

// 文件输入流，生成文本文件

Writer output = null;

// 内存中存储的PDF Document

PDDocument document = null;

try {

// 首先当作一个URL来装载文件，如果得到异常再从本地文件系统//去装载文件

URL url = new URL(pdfFile);

//注意参数已不是以前版本中的URL.而是File。

document = PDDocument.load(pdfFile);

// 获取PDF的文件名

String fileName = url.getFile();

// 以原来PDF的名称来命名新产生的txt文件

if (fileName.length() 4) {

File outputFile = new File(fileName.substring(0, fileName

.length() - 4)

+ ".txt");

textFile = outputFile.getName();

}

} catch (MalformedURLException e) {

// 如果作为URL装载得到异常则从文件系统装载

//注意参数已不是以前版本中的URL.而是File。

document = PDDocument.load(pdfFile);

if (pdfFile.length() 4) {

textFile = pdfFile.substring(0, pdfFile.length() - 4)

+ ".txt";

}

// 文件输入流，写入文件倒textFile

output = new OutputStreamWriter(new FileOutputStream(textFile),

encoding);

// PDFTextStripper来提取文本

PDFTextStripper stripper = null;

stripper = new PDFTextStripper();

// 设置是否排序

stripper.setSortByPosition(sort);

// 设置起始页

stripper.setStartPage(startPage);

// 设置结束页

stripper.setEndPage(endPage);

// 调用PDFTextStripper的writeText提取并输出文本

stripper.writeText(document, output);

} finally {

if (output != null) {

// 关闭输出流

output.close();

}

if (document != null) {

// 关闭PDF Document

document.close();

}

/**

* @param args

public static void main(String[] args) {

// TODO Auto-generated method stub

PdfReader pdfReader = new PdfReader();

try {

// 取得E盘下的SpringGuide.pdf的内容

pdfReader.readFdf("E://SpringGuide.pdf");

} catch (Exception e) {

e.printStackTrace();

}

如何用java读取pdf文档的部分内容

你需要用到PDFbox api

例子如下

import java.io.File;

import java.io.IOException;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.text.PDFTextStripper;

import org.apache.pdfbox.text.PDFTextStripperByArea;

try {

PDDocument document = null;

document = PDDocument.load(new File("test.pdf"));

document.getClass();

if (!document.isEncrypted()) {

PDFTextStripperByArea stripper = new PDFTextStripperByArea();

stripper.setSortByPosition(true);

PDFTextStripper Tstripper = new PDFTextStripper();

String st = Tstripper.getText(document);

System.out.println("Text:" + st);

}

} catch (Exception e) {

e.printStackTrace();

}

用java读取pdf

可以使用PDFBOX0.7.3控件：

import java.io.InputStream;import java.io.IOException;

import org.apache.lucene.document.Document;import org.pdfbox.cos.COSDocument;

import org.pdfbox.pdfparser.PDFParser;import org.pdfbox.pdmodel.PDDocument;

import org.pdfbox.pdmodel.PDDocumentInformation;import org.pdfbox.util.PDFTextStripper;

import com.search.code.Index;

public Document getDocument(Index index, String url, String title, InputStream is)throws DocCenterException {COSDocument cosDoc = null;br/ try {cosDoc = parseDocument(is);br/ } catch (IOException e) {

closeCOSDocument(cosDoc);

throw new DocCenterException("无法处理该PDF文档", e);

}

if (cosDoc.isEncrypted()) {

if (cosDoc != null)

closeCOSDocument(cosDoc);

throw new DocCenterException("该PDF文档是加密文档，无法处理");

}

String docText = null;

try {

PDFTextStripper stripper = new PDFTextStripper();

docText = stripper.getText(new PDDocument(cosDoc));

} catch (IOException e) {

closeCOSDocument(cosDoc);

throw new DocCenterException("无法处理该PDF文档", e);

}

PDDocument pdDoc = null;

try {pdDoc = new PDDocument(cosDoc);br/ PDDocumentInformation docInfo = pdDoc.getDocumentInformation();br/ if(docInfo.getTitle()!=null !docInfo.getTitle().equals("")){br/ title = docInfo.getTitle();}

} catch (Exception e) {

closeCOSDocument(cosDoc);

closePDDocument(pdDoc);

System.err.println("无法取得该PDF文档的元数据" + e.getMessage());

} finally {

closeCOSDocument(cosDoc);

closePDDocument(pdDoc);

}

return null;

}

private static COSDocument parseDocument(InputStream is) throws IOException {

PDFParser parser = new PDFParser(is);parser.parse();return parser.getDocument();

}

private void closeCOSDocument(COSDocument cosDoc) {

if (cosDoc != null) {try {cosDoc.close();} catch (IOException e) {}

}}

private void closePDDocument(PDDocument pdDoc) {

if (pdDoc != null) {

try { pdDoc.close();

} catch (IOException e) {

}}}

怎么用java读取pdf文件内容

你可以把pdf转成word在进行读取

推荐使用转转大师pdf转word转换器，免费的在线工具

百度搜索下，在线免费转换就行了，不用下载注册，很方便

如何使用java读取PDF文件

读取PDF中的文本：Java读取PDF文本

读取PDF中的图片：Java读取PDF图片

javapdf文件读取的介绍就聊到这里吧，感谢你花时间阅读本站内容，更多关于java生成pdf、javapdf文件读取的信息别忘了在本站进行查找喔。

标签：javapdf文件读取