javascript 使用 pdf.js 和 ImageData 将 .pdf 渲染到单个 Canvas
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/15341010/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
Render .pdf to single Canvas using pdf.js and ImageData
提问by H_end-rik
I am trying to read an entire .pdf Document using PDF.js and then render all the pages on a single canvas.
我正在尝试使用 PDF.js 读取整个 .pdf 文档,然后在单个画布上呈现所有页面。
My idea: render each page onto a canvas and get the ImageData (context.getImageData()), clear the canvas do the next page. I store all the ImageDatas in an array and once all pages are in there I want to put all the ImageDatas from the array onto a single canvas.
我的想法:将每个页面渲染到画布上并获取 ImageData (context.getImageData()),清除画布做下一页。我将所有 ImageData 存储在一个数组中,一旦所有页面都在那里,我想将数组中的所有 ImageData 放到一个画布上。
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
//Prepare some things
var canvas = document.getElementById('cv');
var context = canvas.getContext('2d');
var scale = 1.5;
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
//Render all the pages on a single canvas
for(var i = 1; i <= pdf.numPages; i ++){
pdf.getPage(i).then(function getPage(page){
var viewport = page.getViewport(scale);
canvas.width = viewport.width;
canvas.height = viewport.height;
page.render({canvasContext: context, viewport: viewport});
pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
context.clearRect(0, 0, canvas.width, canvas.height);
p.Out("pre-rendered page " + i);
});
}
//Now we have all 'dem Pages in "pages" and need to render 'em out
canvas.height = 0;
var start = 0;
for(var i = 0; i < pages.length; i++){
if(canvas.width < pages[i].width) canvas.width = pages[i].width;
canvas.height = canvas.height + pages[i].height;
context.putImageData(pages[i], 0, start);
start += pages[i].height;
}
});
So from the way I understnad thing this should work, right? When I run this I end up with the canvas that is big enought to contain all the pages of the pdf but doesn't show the pdf...
所以从我理解这应该起作用的方式来看,对吧?当我运行它时,我最终得到的画布足够大,可以包含 pdf 的所有页面,但不显示 pdf ......
Thank you for helping.
谢谢你的帮忙。
采纳答案by markE
I can't speak to the part of your code that renders the pdf into a canvas, but I do see some problems.
我无法谈论将 pdf 呈现为画布的代码部分,但我确实看到了一些问题。
- Everyresetting canvas.width or canvas.height automatically clears the canvas contents. So in the top section, your clearRect is not needed because the canvas is cleared by canvas.width prior to your every page.render.
- More importantly, in the bottom section, all your previous pdf drawings are cleared by every canvas resizing (oops!).
- getImageData() gets an arraywhere each pixel is represented by 4 consecutive elements of that array (red then green then blue then alpha). Since getImageData() is an array, so it doesn't have a pages[i].width or pages[i].height—it only has a pages[i].length. That array length cannot be used to determine widths or heights.
- 每次重置 canvas.width 或 canvas.height 都会自动清除画布内容。因此,在顶部部分,不需要您的 clearRect,因为在您的每个 page.render 之前画布已被 canvas.width 清除。
- 更重要的是,在底部,每次调整画布大小都会清除您以前的所有 pdf 绘图(哎呀!)。
- getImageData() 获取一个数组,其中每个像素由该数组的 4 个连续元素表示(红色然后是绿色,然后是蓝色,然后是 alpha)。因为 getImageData() 是一个数组,所以它没有 pages[i].width 或 pages[i].height——它只有 pages[i].length。该数组长度不能用于确定宽度或高度。
So to get you started, I would start by changing your code to this (very, very untested!):
因此,为了让您入门,我首先将您的代码更改为此(非常非常未经测试!):
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
//Prepare some things
var canvas = document.getElementById('cv');
var context = canvas.getContext('2d');
var scale = 1.5;
var canvasWidth=0;
var canvasHeight=0;
var pageStarts=new Array();
pageStarts[0]=0;
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
//Render all the pages on a single canvas
for(var i = 1; i <= pdf.numPages; i ++){
pdf.getPage(i).then(function getPage(page){
var viewport = page.getViewport(scale);
// changing canvas.width and/or canvas.height auto-clears the canvas
canvas.width = viewport.width;
canvas.height = viewport.height;
page.render({canvasContext: context, viewport: viewport});
pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
// calculate the width of the final display canvas
if(canvas.width>maxCanvasWidth){
maxCanvasWidth=canvas.width;
}
// calculate the accumulated with of the final display canvas
canvasHeight+=canvas.height;
// save the "Y" starting position of this pages[i]
pageStarts[i]=pageStarts[i-1]+canvas.height;
p.Out("pre-rendered page " + i);
});
}
canvas.width=canvasWidth;
canvas.height = canvasHeight; // this auto-clears all canvas contents
for(var i = 0; i < pages.length; i++){
context.putImageData(pages[i], 0, pageStarts[i]);
}
});
Alternatively, here's a more traditional way of accomplishing your task:
或者,这是完成任务的更传统的方式:
Use a single “display” canvas and allow the user to “page through” each desired page.
使用单个“显示”画布并允许用户“翻阅”每个所需的页面。
Since you already start by drawing each page into a canvas, why not keep a separate, hidden canvas for each page. Then when the user wants to see page#6, you just copy the hidden canvas#6 onto your display canvas.
既然您已经开始将每个页面绘制到画布中,为什么不为每个页面保留一个单独的隐藏画布。然后当用户想要查看第 6 页时,您只需将隐藏的画布 #6 复制到您的显示画布上。
The Mozilla devs use this approach in their pdfJS demo here: http://mozilla.github.com/pdf.js/web/viewer.html
Mozilla 开发人员在他们的 pdfJS 演示中使用这种方法:http://mozilla.github.com/pdf.js/web/viewer.html
You can check out the code for the viewer here: http://mozilla.github.com/pdf.js/web/viewer.js
您可以在此处查看查看器的代码:http: //mozilla.github.com/pdf.js/web/viewer.js
回答by
The PDF operations are asynchronous at all stages. This means you also need to catch the promise at the last render as well. If you not catch it you will only get a blank canvas as the rendering isn't finished before the loop continues to the next page.
PDF 操作在所有阶段都是异步的。这意味着您还需要在最后一次渲染时捕获承诺。如果你没有抓住它,你只会得到一个空白画布,因为在循环继续到下一页之前渲染还没有完成。
Tip: I would also recommend that you use something else than getImageData
as this will store uncompressed bitmap, for example the data-uri instead which is compressed data.
提示:我还建议您使用其他东西,getImageData
因为这将存储未压缩的位图,例如 data-uri 而不是压缩数据。
Here is a slightly different approach eliminating the for-loop and uses the promises better for this purpose:
这是一种略有不同的方法,消除了 for 循环,并为此目的更好地使用了 Promise:
var canvas = document.createElement('canvas'), // single off-screen canvas
ctx = canvas.getContext('2d'), // to render to
pages = [],
currentPage = 1,
url = 'path/to/document.pdf'; // specify a valid url
PDFJS.getDocument(url).then(iterate); // load PDF document
/* To avoid too many levels, which easily happen when using chained promises,
the function is separated and just referenced in the first promise callback
*/
function iterate(pdf) {
// init parsing of first page
if (currentPage <= pdf.numPages) getPage();
// main entry point/function for loop
function getPage() {
// when promise is returned do as usual
pdf.getPage(currentPage).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
// now, tap into the returned promise from render:
page.render(renderContext).then(function() {
// store compressed image data in array
pages.push(canvas.toDataURL());
if (currentPage < pdf.numPages) {
currentPage++;
getPage(); // get next page
}
else {
done(); // call done() when all pages are parsed
}
});
});
}
}
When you then need to retrieve a page you simply create an image element and set the data-uri as source:
当您需要检索页面时,您只需创建一个图像元素并将 data-uri 设置为源:
function drawPage(index, callback) {
var img = new Image;
img.onload = function() {
/* this will draw the image loaded onto canvas at position 0,0
at the optional width and height of the canvas.
'this' is current image loaded
*/
ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
callback(); // invoke callback when we're done
}
img.src = pages[index]; // start loading the data-uri as source
}
Due to the image loading it will be asynchronous in nature as well which is why we need the callback. If you don't want the asynchronous nature then you could also do this step (creating and setting the image element) in the render promise above storing image elements instead of data-uris.
由于图像加载,它本质上也是异步的,这就是我们需要回调的原因。如果您不想要异步性质,那么您也可以在上面的渲染承诺中执行此步骤(创建和设置图像元素),而不是存储图像元素而不是 data-uris。
Hope this helps!
希望这可以帮助!
回答by Cesar
You can pass the number page to the promises , get that page canvas data and render in the right order on canvas
您可以将数字页面传递给承诺,获取该页面画布数据并在画布上以正确的顺序呈现
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
Complete example
完整示例
function renderPDF(url, canvas) {
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
var context = canvas.getContext('2d');
var scale = 1;
var canvasWidth = 256;
var canvasHeight = 0;
var pageStarts = new Array();
pageStarts[0] = 0;
var k = 0;
function finishPage(localCanvas, num) {
var ctx = localCanvas.getContext('2d');
pages[num] = ctx.getImageData(0, 0, localCanvas.width, localCanvas.height);
// calculate the accumulated with of the final display canvas
canvasHeight += localCanvas.height;
// save the "Y" starting position of this pages[i]
pageStarts[num] = pageStarts[num -1] + localCanvas.height;
if (k + 1 >= pdf.numPages)
{
canvas.width = canvasWidth;
canvas.height = canvasHeight; // this auto-clears all canvas contents
for (var i = 0; i < pages.length; i++) {
context.putImageData(pages[i+1], 0, pageStarts[i]);
}
var img = canvas.toDataURL("image/png");
$scope.printPOS(img);
}
k++;
}
function renderPage(page, localCanvas, num) {
var ctx = localCanvas.getContext('2d');
var viewport = page.getViewport(scale);
// var viewport = page.getViewport(canvas.width / page.getViewport(1.0).width);
// changing canvas.width and/or canvas.height auto-clears the canvas
localCanvas.width = viewport.width;
/// viewport.width = canvas.width;
localCanvas.height = viewport.height;
var renderTask = page.render({canvasContext: ctx, viewport: viewport});
renderTask.then(() => {
finishPage(localCanvas, num);
});
}
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
});
}
function renderPDF(url, canvas) {
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
var context = canvas.getContext('2d');
var scale = 1;
var canvasWidth = 256;
var canvasHeight = 0;
var pageStarts = new Array();
pageStarts[0] = 0;
var k = 0;
function finishPage(localCanvas, num) {
var ctx = localCanvas.getContext('2d');
pages[num] = ctx.getImageData(0, 0, localCanvas.width, localCanvas.height);
// calculate the accumulated with of the final display canvas
canvasHeight += localCanvas.height;
// save the "Y" starting position of this pages[i]
pageStarts[num] = pageStarts[num -1] + localCanvas.height;
if (k + 1 >= pdf.numPages)
{
canvas.width = canvasWidth;
canvas.height = canvasHeight; // this auto-clears all canvas contents
for (var i = 0; i < pages.length; i++) {
context.putImageData(pages[i+1], 0, pageStarts[i]);
}
var img = canvas.toDataURL("image/png");
$scope.printPOS(img);
}
k++;
}
function renderPage(page, localCanvas, num) {
var ctx = localCanvas.getContext('2d');
var viewport = page.getViewport(scale);
// var viewport = page.getViewport(canvas.width / page.getViewport(1.0).width);
// changing canvas.width and/or canvas.height auto-clears the canvas
localCanvas.width = viewport.width;
/// viewport.width = canvas.width;
localCanvas.height = viewport.height;
var renderTask = page.render({canvasContext: ctx, viewport: viewport});
renderTask.then(() => {
finishPage(localCanvas, num);
});
}
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
});
}