Folks- I'm relatively new to ASP.NET, and have a question that has stumped my peers-- folks much more experienced than myself.
My company created a website that uses iTextSharp to build and stream PDFs. The functionality works perfectly on my company's development and staging/test servers. The customer's functionality isn't working well, however. The customer's server streams a file where the PDF is missing the last block of data representing the EOF section. The PDF seems to build correctly, streams correctly, but when users open the PDF, the following error displays: 'There was an error opening this document. The file is damaged and could not be repaired.'
By comparing the PDFs in a text viewer (comparing the PDFs from my server vice the customer's server), I can see that the EOF section is missing from the customer's PDF. I'll also note that no errors are thrown during PDF creation, if that's helpful. To make matters more difficult, I have no access to the customer's servers, so I won't be able to interact with the systems directly.
The asp.net version is 3.5. Both of our servers (my company and the customer) are: running IIS7.5 on Server 2008R2; using iTextSharp is 5.1.2; and are configured for FIPS compatibility.
I've read dozens and dozens of posts detailing why a PDF isn't created properly, why it may not be streaming, and all things related, but I haven't seen this particular issue before. I guess what I need to know in the short-term is: 1) what can I provide to help diagnose the issue, 2) where is a good place to start looking for areas of concern?
Also, I updated to revision 5.5.3 last night; same results-- it works fine on my servers, but produces broken PDFs on the customer's server.
Code added:
Public Function BuildReport(ByVal tblReport As DataTable, _
ByRef memStream As MemoryStream, _
ByRef strErrMsg As String) As Boolean
Dim booOK As Boolean = True
strErrMsg = String.Empty
' Create document
Try
' Create writer (listens to the document and directs PDF stream)
memStream = New MemoryStream()
Dim msWriter As PdfWriter = PdfWriter.GetInstance(_document, memStream)
msWriter.CloseStream = False
'Create header
Dim ev As New itsEvents
msWriter.PageEvent = ev
' Set document metadata
_document.AddTitle(_strMetaTitle)
_document.AddSubject(_strMetaSubject)
_document.AddCreator(_strMetaApplication)
_document.AddAuthor(_strMetaAuthor)
' Open document, add document content, close document
_document.Open()
AddReportContent(tblReport)
_document.Close()
Catch ex As Exception
booOK = False
strErrMsg = ex.Message
End Try
Return booOK
End Function
Private Sub AddReportContent(ByVal tblReport As DataTable)
' Count report columns
Dim intReportColumns As Integer = 0
For Each col As DataColumn In tblReport.Columns
If ContainedInColumnMask(col.ColumnName) Then
intReportColumns += 1
End If
Next
' Build table
Dim table As PdfPTable
Dim cell As PdfPCell
Dim phrase As Phrase
If intReportColumns >= 1 Then
' Init table
table = New PdfPTable(intReportColumns)
' Add title to table
'phrase = New Phrase(_strMetaTitle, _fontLarge)
'cell = New PdfPCell(phrase)
'cell.Colspan = intReportColumns
'cell.HorizontalAlignment = 1 ' 0=Left, 1=Centre, 2=Right
'table.AddCell(cell)
' Add column headers to table
Dim i As Integer = 0
Dim intColWidth As Integer
Dim intColWidths As Integer() = New Integer(intReportColumns - 1) {}
Dim intColWidthTotal As Integer = 0
Dim strColName As String
For Each col As DataColumn In tblReport.Columns
If ContainedInColumnMask(col.ColumnName) Then
strColName = col.ColumnName
If (col.ExtendedProperties.Item("NOTEXTEXPORT") <> True) Then
If col.ExtendedProperties.Contains("FRIENDLYNAME") Then
strColName = col.ExtendedProperties.Item("FRIENDLYNAME")
End If
End If
phrase = New Phrase(strColName, _fontMedium)
cell = New PdfPCell(phrase)
cell.BorderWidth = 1
cell.BackgroundColor = iTextSharp.text.BaseColor.LIGHT_GRAY
'cell.BackgroundColor = iTextSharp.text.Color.LIGHT_GRAY
table.AddCell(cell)
intColWidth = GetColumnWidth(col, strColName, _fontMedium.Size, _fontSmall.Size)
intColWidths(i) = intColWidth
intColWidthTotal += intColWidth
i += 1
End If
Next
table.TotalWidth = intColWidthTotal
table.SetWidths(intColWidths)
' Add rows to table
For Each row As DataRow In tblReport.Rows
For Each col As DataColumn In tblReport.Columns
If ContainedInColumnMask(col.ColumnName) Then
phrase = New Phrase(SetBlankIfNothing(row.Item(col.ColumnName).ToString()), _fontSmall)
cell = New PdfPCell(phrase)
cell.BorderWidth = 0.5
table.AddCell(cell)
End If
Next
Next
Else
' Init table
table = New PdfPTable(1)
' Nothing to add to table
table.AddCell(String.Empty)
End If
' Add table to document
_document.Add(table)
End Sub
Public Sub New(ByVal strMetaTitle As String, _
ByVal strMetaSubject As String, _
ByVal strMetaApplication As String, _
ByVal strMetaAuthor As String, _
Optional ByVal strColumnMask As String = "")
GetStaticInfo()
_strMetaTitle = strMetaTitle
_strMetaSubject = strMetaSubject
_strMetaApplication = strMetaApplication
_strMetaAuthor = strMetaAuthor
_document = New iTextSharp.text.Document(_itsPage, _itsMarginLeft, _itsMarginRight, _itsMarginTop, _itsMarginBottom)
If strColumnMask <> "" And Not strColumnMask Is Nothing Then
_strColumnMask = strColumnMask
End If
End Sub
Public Sub New(ByVal strMetaTitle As String, _
ByVal strMetaSubject As String, _
ByVal strMetaApplication As String, _
ByVal strMetaAuthor As String, _
Optional ByVal strColumnMask As String = "")
GetStaticInfo()
_strMetaTitle = strMetaTitle
_strMetaSubject = strMetaSubject
_strMetaApplication = strMetaApplication
_strMetaAuthor = strMetaAuthor
_document = New iTextSharp.text.Document(_itsPage, _itsMarginLeft, _itsMarginRight, _itsMarginTop, _itsMarginBottom)
If strColumnMask <> "" And Not strColumnMask Is Nothing Then
_strColumnMask = strColumnMask
End If
End Sub
Related
I am looking to use PDFSharp to convert a HTML page into a PDF. This then is attached into an email and sent all in one go.
So, I have a aspx page and vb code file in which gets called through a database SQL job.
Protected Sub Page_Load(sender As Object, e As EventArgs) Handles Me.Load
Dim ReqUrl As String, WorkflowID As String = String.Empty
Using con As New SqlConnection(GlobalVariables.ConStr)
Using com As New SqlCommand("EXEC App.GetWorkflowToSend", con)
con.Open()
Using dr = com.ExecuteReader
Try
While dr.Read
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12
ReqUrl = HttpContext.Current.Request.Url.GetLeftPart(UriPartial.Authority) + dr.Item("WorkflowLink")
WorkflowID = dr.Item("WorkflowID")
Dim r As String = HttpContext.Current.Request.Url.GetLeftPart(UriPartial.Authority) + dr.Item("WorkflowLink")
Dim p As String = Server.MapPath("~\Data\Files") + "\" + WorkflowID + ".pdf"
Dim t As Thread = New Thread(CType(
Function()
ConvertHTML(r, p)
SendMail(Nothing, EmailFrom, "email#address", "New PDF Generated " + WorkflowID, r + "<br/>" + p + "<br/>" + WorkflowID, EmailUser, EmailPass, EmailHost, EmailPort, EmailSSL, "", Nothing, p)
End Function, ThreadStart))
t.SetApartmentState(ApartmentState.STA)
t.Start()
Response.Write(r + "<br>")
Response.Write(p)
End While
Catch
SendMail(Nothing, EmailFrom, "email#address", "Error: " + Err.Description, WorkflowID, EmailUser, EmailPass, EmailHost, EmailPort, EmailSSL, "", Nothing)
End Try
End Using
End Using
End Using
End Sub
In the vb code I essentially call a database stored procedure. This returns some records.
For each of these records, I am currently using HttpContext.Current.Request.Url to make up a string which is essentially the the document url.
I also then declare and specify the location as a String of where I want the converted PDF to be stored.
Public Shared Function ConvertHTML(HTMLPage As String, FileName As String) As String
Dim pngfilename As String = Path.GetTempFileName()
Dim res As String = "" ' = ok
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12
'Try
Using wb As System.Windows.Forms.WebBrowser = New System.Windows.Forms.WebBrowser
wb.ScrollBarsEnabled = False
wb.ScriptErrorsSuppressed = True
wb.Navigate(HTMLPage)
While Not (wb.ReadyState = WebBrowserReadyState.Complete)
Application.DoEvents()
End While
wb.Width = wb.Document.Body.ScrollRectangle.Width
wb.Height = wb.Document.Body.ScrollRectangle.Height
If wb.Height > 3000 Then
wb.Height = 3000
End If
' Get a Bitmap representation of the webpage as it's rendered in the WebBrowser control
Dim b As Bitmap = New System.Drawing.Bitmap(wb.Width, wb.Height)
Dim hr As Integer = b.HorizontalResolution
Dim vr As Integer = b.VerticalResolution
wb.DrawToBitmap(b, New Rectangle(0, 0, wb.Width, wb.Height))
wb.Dispose()
If File.Exists(pngfilename) Then
File.Delete(pngfilename)
End If
b.Save(pngfilename, Imaging.ImageFormat.Png)
b.Dispose()
Using doc As PdfSharp.Pdf.PdfDocument = New PdfSharp.Pdf.PdfDocument
Dim page As PdfSharp.Pdf.PdfPage = New PdfSharp.Pdf.PdfPage()
page.Width = PdfSharp.Drawing.XUnit.FromInch(wb.Width / hr)
page.Height = PdfSharp.Drawing.XUnit.FromInch(wb.Height / vr)
doc.Pages.Add(page)
Dim xgr As PdfSharp.Drawing.XGraphics = PdfSharp.Drawing.XGraphics.FromPdfPage(page)
Dim img As PdfSharp.Drawing.XImage = PdfSharp.Drawing.XImage.FromFile(pngfilename)
xgr.DrawImage(img, 0, 0)
doc.Save(FileName)
doc.Close()
img.Dispose()
xgr.Dispose()
End Using
End Using
Return res
End Function
I run the conversion function with these two strings and finally call a mailing function.
PDF Error
The problem I am having at the moment is the attached PDF I receive in the email doesn't contain the correct output and states 'Navigation to the webpage was cancelled'.
http://127.0.0.1/PDF/TTR/4
C:\inetpub\wwwroot\Prod\Data\Files\4.pdf
I also sent the two strings as output within the email and they look ok.
I'm sure there is something small missing whether that be in my conversion function or just in the main code file.
I have a file folder that contains pdf files. The sql database is not directly related to the files. For example, I have "Invoice_co_355_24636.pdf" and "Invoice_co_355_25127.pdf" in the Invoices folder. I query the database and find that Invoice "24636" is not paid and "25127" is marked paid in full, so I would like to delete "Invoice_co_355_25127.pdf" at that point since it is paid.
So what I'm doing is getting all the file names from the folder, parsing each file to get just the last numbers, (which correlate to the database). If the database shows that one or more of the Invoices has been paid, I would like to delete the file.
I have successfully, (below), been able to parse the server file names as
"InvNo" as the parsed file name, (which corelates to the database), and
"InvNoFull", which is the full database file name that needs to be deleted if it is marked as paid in the database.
But after getting the file names and the parsed file names, I do not know how to actually compare it to the database and then delete. Any help is appreciated.
Dim files() As String = Directory.GetFiles(Server.MapPath("/Contents/Invoices/" + Variable.ToString() + "/Co/" + ddlCo.SelectedValue.ToString() + "/"))
For Each file As String In files
Dim InvNo As String = Path.GetFileNameWithoutExtension(file)
Dim InvNoFull As String = Path.GetFileName(file)
InvNo = InvNo.Substring(InvNo.LastIndexOf("_") + 1, InvNo.Length - InvNo.LastIndexOf("_") - 1)
Dim CnStr As String = (ConfigurationManager.ConnectionStrings("ClientConnectionString").ConnectionString)
Dim adp As SqlDataAdapter = New SqlDataAdapter("select OrderBilling.OrderId from orderBilling Left Outer Join Orders on OrderBilling.OrderId = Orders.OrderId Where Orders.CompanyId = " & ddlCo.SelectedValue.ToString() & " And Orders.OwnerId = " & Variable.ToString() & " And OrderBilling.PaidInFull = 'False'", CnStr)
Dim ds As DataSet = New DataSet()
adp.Fill(ds, "outBill")
For Each Row As DataRow In ds.Tables(0).Rows
For Each Coll As DataColumn In ds.Tables(0).Columns
Dim s As String = Row(Coll.ColumnName).ToString()
If s <> InvNo Then
Dim FileToDelete() As String
FileToDelete = Directory.GetFiles(Server.MapPath("/Contents/Invoices/" + Variable.ToString() + "/Co/" + ddlCo.SelectedValue.ToString() + "/" + InvNoFull))
If System.IO.File.Exists(FileToDelete.ToString()) = True Then
System.IO.File.Delete(FileToDelete.ToString())
'MsgBox("File Deleted")
End If
End If
Next
Next
Next
With help from Mary but this deletes all the files in the folder, but I want it to delete only the files not returned in the database query:
Private Sub DeletePaidInvoices(OwnerID2 As String, CompanyID As String)
Dim InvoicePath = "/Contents/Invoices/" & OwnerID2 & "/Co/" & CompanyID & "/"
Dim files() As String = Directory.GetFiles(Server.MapPath(InvoicePath))
Dim lst = GetInvoiceInfo(CInt(CompanyID), CInt(OwnerID2))
For Each file As String In files
Dim InvNo As String = Path.GetFileNameWithoutExtension(file)
Dim InvNoFull As String = Path.GetFileName(file)
InvNo = InvNo.Substring(InvNo.LastIndexOf("_") + 1, InvNo.Length - InvNo.LastIndexOf("_") - 1)
'Debug.Print(InvNo) 'To check your substring, will not be in release version
For Each i As Integer In lst
Dim s As String = i.ToString()
If s <> InvNo Then
Dim FileToDelete As String
FileToDelete = "Invoice_co_" & CompanyID & "_" & InvNo & ".pdf"
If System.IO.File.Exists(Server.MapPath(InvoicePath & FileToDelete.ToString())) = True Then
System.IO.File.Delete(Server.MapPath(InvoicePath & FileToDelete.ToString()))
End If
End If
Next
Next
End Sub
Call your delete method from, say, a button passing the variable from the user interface.
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim CompID = ddlCo.SelectedValue.ToString()
Dim OwnerID = "comes from somewhere"
DeletePaidInvoices(OwnerID, CompID)
End Sub
The delete method gets the file list. I have no idea about Server.MapPath but I assume you do. Next we get a List(Of Integer) representing the results of you database query (the OrderID's). Next we enter the For loop. I added a Debug line to see what is being returned by SubString code. As you can see it is simpler code to loop through a list.
Private Sub DeletePaidInvoices(OwnerID As String, CompanyID As String)
Dim InvoicePath = $"/Contents/Invoices/{OwnerID}/Co/{CompanyID}/"
Dim files() As String = Directory.GetFiles(Server.MapPath(InvoicePath))
Dim lst = GetInvoiceInfo(CInt(CompanyID), CInt(OwnerID))
For Each file As String In files
Dim InvNo As String = Path.GetFileNameWithoutExtension(file)
Dim InvNoFull As String = Path.GetFileName(file)
InvNo = InvNo.Substring(InvNo.LastIndexOf("_") + 1, InvNo.Length - InvNo.LastIndexOf("_") - 1)
Debug.Print(InvNo) 'To check your substring, will not be in release version
For Each i As Integer In lst
Dim s As String = i.ToString()
If s <> InvNo Then
Dim FileToDelete() As String
FileToDelete = Directory.GetFiles(Server.MapPath(InvoicePath & InvNoFull))
If System.IO.File.Exists(FileToDelete.ToString()) = True Then
System.IO.File.Delete(FileToDelete.ToString())
'MsgBox("File Deleted")
End If
End If
Next
Next
End Sub
The data retrieval is in a separate function. Use Using blocks to make sure your connections and commands are closed and disposed. Always use parameters with Sql Server. There is a bit of Linq magic at the end to create the list from the DataTable.
Private Function GetInvoiceInfo(CompanyID As Integer, OwnerID As Integer) As List(Of Integer)
Dim dt As New DataTable
Using cn As New SqlConnection(ConfigurationManager.ConnectionStrings("ClientConnectionString").ConnectionString),
cmd As New SqlCommand("select OrderBilling.OrderId
from orderBilling
Left Outer Join Orders on OrderBilling.OrderId = Orders.OrderId
Where Orders.CompanyId = #CompanyID
And Orders.OwnerId = #OwnerID
And OrderBilling.PaidInFull = #Paid;", cn)
cmd.Parameters.Add("#CompanyID", SqlDbType.Int).Value = CompanyID
cmd.Parameters.Add("#OwnerID", SqlDbType.Int).Value = OwnerID
cmd.Parameters.Add("#Paid", SqlDbType.Bit).Value = False
cn.Open()
dt.Load(cmd.ExecuteReader)
End Using
Dim lstOrderID As List(Of Integer) = (From dRow In dt.AsEnumerable() Select dRow.Field(Of Integer)(0)).ToList
Return lstOrderID
End Function
The actual deleting of the files (or moving to a Paid folder) is up to you.
I am using the OpenXML library to auto generate Word files. I have a function that takes a group of files and merges them into one document. As I merge a new file into a document, I want each file to start on a new page. But, I don't want to have any blank pages. The code I have mostly works, but an issue comes up is if a file being merged in is a filled page, then a page break is still added, resulting in an empty page being added. I am not sure how to best deal with this, to prevent blank pages from being added. Here is my code:
Public Sub MergeFiles(ByVal filePaths As List(Of String), ByVal fileName As String)
Dim newFile As String = HttpRuntime.AppDomainAppPath & "PDF_Templates\TempFolder\catalog-" & Guid.NewGuid.ToString & ".docx"
File.Copy(fileName, newFile)
Dim counter As Integer = 0
For Each filePath As String In filePaths
Dim wordDoc As WordprocessingDocument = WordprocessingDocument.Open(newFile, True)
Using wordDoc
Dim mainPart As MainDocumentPart = wordDoc.MainDocumentPart
Dim altChunkId As String = "altChunkId" & counter
Dim chunk As AlternativeFormatImportPart = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML, altChunkId)
Dim fileStream As FileStream = File.Open(filePath, FileMode.Open)
Using fileStream
chunk.FeedData(fileStream)
End Using
Dim AltChunk As AltChunk = New AltChunk()
AltChunk.Id = altChunkId
' Dont add a page break to the first page.
If counter > 0 Then
Dim last As OpenXmlElement = wordDoc.MainDocumentPart.Document.Body.Elements().LastOrDefault(Function(e) TypeOf e Is Paragraph OrElse TypeOf e Is AltChunk)
last.InsertAfterSelf(New Paragraph(New Run(New Break() With {
.Type = BreakValues.Page
})))
End If
mainPart.Document.Body.InsertAfter(Of AltChunk)(AltChunk, mainPart.Document.Body.Elements(Of Paragraph).Last())
mainPart.Document.Save()
wordDoc.Close()
End Using
counter = counter + 1
Next
End Sub
I'm trying to set up my program to connect to my FTP and download files directly from my server. This what I have so far. I don't know what I'm doing wrong, or where I'm going wrong, because no matter how I code it either says "End Expected" or "Method can't handle etc due to signatures not being compatible"
I don't know what I'm doing wrong, any help would be greatly appreciated.
Private Sub Button16_Click(sender As Object, e As EventArgs) Handles Button16.Click
(ByVal downloadpath As String, ByVal ftpuri As String, ByVal ftpusername As String, ByVal ftppassword As String)
'Create a WebClient.
Dim request As New WebClient()
' Confirm the Network credentials based on the user name and password passed in.
request.Credentials = New Net.NetworkCredential("Username", "Password")
'Read the file data into a Byte array
Dim bytes() As Byte = request.DownloadData("ftp://ftp.yourwebsitename/file.extension")
Try
' Create a FileStream to read the file into
Dim DownloadStream As FileStream = IO.File.Create("C:\Local\Test.zip")
' Stream this data into the file
DownloadStream.Write(bytes, 0, bytes.Length)
' Close the FileStream
DownloadStream.Close()
Catch ex As Exception
MessageBox.Show(ex.Message)
End Try
MessageBox.Show("Process Complete")
End Sub
You probably pasted an existing method inside a Button.Click handler by mistake.
Rebuilding what was probably the original method is almost enough.
Note that this FTP procedure is a quite basic. You can rely on it only when downloading from a known remote resource. Also, as it it, it doesn't allow to show the download progress or even to cancel it.
Maybe take a look at the WebClient.DownloadDataAsync method, which allows to easily implement a progress bar and cancel the download procedure, when needed.
Also, if you're interested, in this SO question, you can find some notes and a sample Form, which can be included in a Project, to test some features of the FtpWebRequest.
Private Sub Button16_Click(sender As Object, e As EventArgs) Handles Button16.Click
Button16.Enabled = False
DownloadFile("C:\Local\Test.zip", "ftp://ftp.example.com/file.ext", "[username]", "[password]")
Button16.Enabled = True
End Sub
Private Sub DownloadFile(destinationPath As String, ftpResource As String, ftpUsername As String, ftpPassword As String)
Dim client As New WebClient()
client.Credentials = New NetworkCredential(ftpUsername, ftpPassword)
Try
Dim dataBytes() As Byte = client.DownloadData(ftpResource)
If dataBytes.Length > 0 Then
File.WriteAllBytes(destinationPath, dataBytes)
MessageBox.Show("Download Complete")
Else
MessageBox.Show("Download failed")
End If
Catch ex As WebException
MessageBox.Show(ex.Message)
Catch ex As IoException
MessageBox.Show(ex.Message)
End Try
End Sub
Here is a Console solution. Compile this into a exe file, and run it by double-clicking the executable or get a scheduler (i.e., Windos Task Scheduler) to open and run the file (it runs as soon as it opens).
Imports System
Imports System.Collections.Generic
Imports System.ComponentModel
Imports System.Data
Imports System.Linq
Imports System.Text
Imports System.Net
Imports System.IO
Namespace ConsoleApplication1
Class Program
Private Shared Sub Main(ParamArray ByVal args() As String)
If args.Any Then
' Do code that references args
Dim dt As DateTime = DateTime.Today.AddDays(-1)
Dim date As String = String.Format("{0:yyyyMMdd}", dt)
Dim p As Program = New Program
p.getFTPFile(("raw_CA_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_EM_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_GLB_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_US_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
Else
' Do code that depends on no input arguments.
Dim dt As DateTime = DateTime.Today.AddDays(-1)
Dim date As String = String.Format("{0:yyyyMMdd}", dt)
Dim p As Program = New Program
p.getFTPFile(("raw_CA_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_EM_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_GLB_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
p.getFTPFile(("raw_US_" _
+ (date + ".txt")))
' match a certain pattern in the name of the file
End If
End Sub
Private Sub getFTPFile(ByVal FTPFile As String)
FTPSettings.IP = "000.000.100.000"
FTPSettings.UserID = "your_id"
FTPSettings.Password = "your_password"
Dim reqFTP As FtpWebRequest = Nothing
Dim ftpStream As Stream = Nothing
Try
Dim outputStream As FileStream = New FileStream(("C:\Downloads\AFL_Files\" + FTPFile), FileMode.Create)
reqFTP = CType(FtpWebRequest.Create(("ftp://something#ftp.corp.com/your_path/" + FTPFile)),FtpWebRequest)
reqFTP.Method = WebRequestMethods.Ftp.DownloadFile
reqFTP.UseBinary = true
reqFTP.Credentials = New NetworkCredential(FTPSettings.UserID, FTPSettings.Password)
Dim response As FtpWebResponse = CType(reqFTP.GetResponse,FtpWebResponse)
ftpStream = response.GetResponseStream
Dim cl As Long = response.ContentLength
Dim bufferSize As Integer = 2048
Dim readCount As Integer
Dim buffer() As Byte = New Byte((bufferSize) - 1) {}
readCount = ftpStream.Read(buffer, 0, bufferSize)
While (readCount > 0)
outputStream.Write(buffer, 0, readCount)
readCount = ftpStream.Read(buffer, 0, bufferSize)
End While
ftpStream.Close
outputStream.Close
response.Close
Catch ex As Exception
If (Not (ftpStream) Is Nothing) Then
ftpStream.Close
ftpStream.Dispose
End If
Throw New Exception(ex.Message.ToString)
End Try
End Sub
Public Class FTPSettings
Public Shared Property IP As String
Get
End Get
Set
End Set
End Property
Public Shared Property UserID As String
Get
End Get
Set
End Set
End Property
Public Shared Property Password As String
Get
End Get
Set
End Set
End Property
End Class
End Class
End Namespace
I've been tasked with a project that needs to read the content of a PDF document with populated form fields and save the content to a database. Once the data extracted I should be able to re-create the document using a master copy of the template and re-populated form field data.
Our user base will complete the form/template on a mobile device (specifically Android devices for our environment). They will also be using the Adobe Acrobat Reader mobile app to complete the documents. Once completed, each engineer working on the document will sign the document using the signature function in the mobile app (multiple signatures could be present on multiple pages) and submit the form (which currently emails a copy of the completed PDF to a specific email address).
Once I have a copy of the completed PDF document I am able to read the AcroForm fields using from a .NET application using the iTextSharp library and do whatever I need to do with it (store field "name" and "value" in a database)
Dim reader As PdfReader = New PdfReader(pdfBytes)
Dim pdfFormFields As AcroFields = reader.AcroFields
For Each formField In reader.AcroFields.Fields.Keys
Dim ff As New FormField
ff.Name = formField
ff.Value = pdfFormFields.GetField(formField)
Do_stuff_with(ff)
Next
I'm then able to re-populate the blank pdf template at a later stage with the form data, however am struggling to read the "signature" aspect of it embedded in the file.
When completed via the Acrobat Reader Android app I believe the "signature" isn't technically a signature in the proper "Digital Signature" sense that it can be read using the iTextSharp AcroFields.GetSignatureNames() or AcroFields.GetSignatureDictionary methods. Instead I beleive the signature is stored as a Stream object within an Annotation in the document however I'm currently unable to read and convert this into a Byte Array to store in a database.
I know I will also need to get the page/position of each signature so that I can re-populate at a later stage.
I've tried several methods including extracting all Images from the document but this only extracts embedded images that existed in the template and not the annotation signatures. With a blank dummy document containing just a submit button and signature I get an error after getting the dictionary object PdfName.RESOURCES as if nothing exists.
Dim pdf as New PdfReader(bytes)
For i As Int16 = 1 To pdf.NumberOfPages
Dim pg As PdfDictionary = pdf.GetPageN(i)
Dim res As PdfDictionary = CType(PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)), PdfDictionary)
' ###
' ###
' ### Errors on next line with
' ### "Object reference not set to an instance of an object"
' ###
' ###
Dim xobj As PdfDictionary = CType(PdfReader.GetPdfObject(pg.Get(PdfName.XOBJECT)), PdfDictionary)
If xobj IsNot Nothing Then
For Each name As PdfName In xobj.Keys
Dim obj As PdfObject = xobj.Get(name)
If obj.IsIndirect Then
Dim tg As PdfDictionary = CType(PdfReader.GetPdfObject(obj), PdfDictionary)
Dim type As PdfName = CType(PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)), PdfName)
If PdfName.IMAGE.Equals(type) Then
Dim xrefIdx As Integer = CType(obj, PRIndirectReference).Number
Dim pdfObj As PdfObject = pdf.GetPdfObject(xrefIdx)
Dim str As PdfStream = CType(pdfObj, PdfStream)
Dim bytes As Byte() = PdfReader.GetStreamBytesRaw(CType(str, PRStream))
Dim img As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(CType(obj, PRIndirectReference))
Dim filter As String = tg.Get(PdfName.FILTER).ToString
If filter = "/DCTDecode" Then
Dim img2 As System.Drawing.Image = System.Drawing.Image.FromStream(New MemoryStream(bytes))
Dim stream As MemoryStream = New MemoryStream
img2.Save(stream, System.Drawing.Imaging.ImageFormat.Jpeg)
stream.Position = 0
PdfReader.KillIndirect(obj)
img = iTextSharp.text.Image.GetInstance(stream)
writer.AddDirectImageSimple(img, CType(obj, PRIndirectReference))
End If
End If
End If
Next
End If
Next
If I inspect the document with iText RUPS I can see two Stream objects (Inspect1.png) which I assume are the two signatures in my test document TestDoc_Complete.pdf but I'm unable to extract these either into a Byte Array or Memory Stream so that I can manipulate and save.
Any help (VB/C#) would be appreciated to help me solve this.
Thanks
--EDIT--
I'm now able to loop through the PdfObjects using XrefSize and identify which objects are Streams. I can read the Stream` Bytes and Raw Bytes and output these to a file but these are unreadable by any image viewer.
Dim pdf = New PdfReader(bytes)
Dim obj As PdfObject
For i As Integer = 1 To pdf.XrefSize
Try
obj = pdf.GetPdfObject(i)
If obj IsNot Nothing And obj.IsStream Then
Dim stream As PRStream = CType(obj, PRStream)
Dim type As PdfName = Nothing
Try
type = CType(pdfreader.GetPdfObject(stream.Get(PdfName.FILTER)), PdfName)
Catch ex As Exception
End Try
If type IsNot Nothing And PdfName.FLATEDECODE.Equals(type) Then
Dim b1 As Byte() = pdfreader.GetStreamBytes(stream)
Dim b2 As Byte() = pdfreader.GetStreamBytesRaw(stream)
csLog.AddLog("Stream Length: " & stream.Length, csLogging.DebugLevel.Debug)
csLog.AddLog("bytes1 Length: " & b1.Length, csLogging.DebugLevel.Debug)
csLog.AddLog("bytes2 Length: " & b2.Length, csLogging.DebugLevel.Debug)
Dim fos As FileStream
' ### Write Bytes to file for testing
fos = New FileStream(Server.MapPath(".") & "\bytes1" & i, FileMode.Create)
fos.Write(b1, 0, b1.Length)
fos.Flush()
fos.Close()
' ### Write RawBytes to file for testing
fos = New FileStream(Server.MapPath(".") & "\bytes2" & i, FileMode.Create)
fos.Write(b2, 0, b2.Length)
fos.Flush()
fos.Close()
' ### CONVERSION ATTEMPTS
ConvertAttempt1(b2, i) ' ### Using Raw Bytes
ConvertAttempt2(stream, i)
End If
End If
Catch ex As Exception
End Try
Next
The first file using b1 appears to be a text representation of the PRStream
q
.160714 0 0 .160714 0 0 cm
0.00000 0.00000 0.00000 RG 0.00000 0.00000 0.00000 rg 1 J 1 j 26.48880 w 488.00000 115.43372 m 488.00000 115.43372 l S 20.37600 w 184.00000 155.43372 m 184.00000 155.43372 184.00000 155.43372 182.44000 156.45367 c S 20.37600 w
.....
.....
.....
which I assume are the vector graphic curves/strokes.
The output of b2 (raw bytes) has the same content length of the signatures I am expecting (3305 and 4834) as shown in the two sterams in iText RUPS.
I've attempted to convert the bytes to an image (JPG) but get errors
' ### Conversion attempt 1
Sub ConvertAttempt1(ByVal rawBytes As Byte(), ByVal xRef As Int16)
Try
Using memStream As MemoryStream = New MemoryStream(rawBytes)
memStream.Position = 0
' ###
' ###
' ### Falls over on next line - "Parameter is not valid"
' ###
' ###
Dim img As System.Drawing.Image = System.Drawing.Image.FromStream(memStream)
Dim path As String = System.IO.Path.Combine(Server.MapPath("."), String.Format("convert1_{0}.jpg", xRef))
Dim parms As System.Drawing.Imaging.EncoderParameters = New System.Drawing.Imaging.EncoderParameters(1)
parms.Param(0) = New System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0)
Dim jpegEncoder As System.Drawing.Imaging.ImageCodecInfo = GetImageEncoder("JPEG")
img.Save(path, jpegEncoder, parms)
End Using
Catch ex As Exception
'csLog.AddLog(ex.Message, csLogging.DebugLevel.Errors)
'csLog.AddLog(ex.StackTrace, csLogging.DebugLevel.Errors)
End Try
End Sub
and
Sub ConvertAttempt2(ByVal stream As PRStream, ByVal xRef As Int16)
' ### Conversion attempt 2
Try
' ###
' ###
' ### Falls over on next line - "Object reference not set to an instance of an object."
' ###
' ###
Dim pdfImage As PdfImageObject = New PdfImageObject(stream)
Dim img As System.Drawing.Image = pdfImage.GetDrawingImage()
Dim path As String = System.IO.Path.Combine(Server.MapPath("."), String.Format("convert2_{0}.jpg", xRef))
Dim parms As System.Drawing.Imaging.EncoderParameters = New System.Drawing.Imaging.EncoderParameters(1)
parms.Param(0) = New System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0)
Dim jpegEncoder As System.Drawing.Imaging.ImageCodecInfo = GetImageEncoder("JPEG")
img.Save(path, jpegEncoder, parms)
Catch ex As Exception
csLog.AddLog(ex.Message, csLogging.DebugLevel.Errors)
csLog.AddLog(ex.StackTrace, csLogging.DebugLevel.Errors)
End Try
End Sub