How to Convert pdf file to datatable - asp.net

Is there any way to convert PDF file to DataTable? The PDF file mainly consist of only tables any help will be highly appreciated.

using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
public DataTable ImportPDF(string Filename)
{
string strText = string.Empty;
List<string[]> list = new List<string[]>();
string[] PdfData = null;
try
{
PdfReader reader = new PdfReader((string)Filename);
for (int page = 1; page <= reader.NumberOfPages; page++)
{
ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
String cipherText = PdfTextExtractor.GetTextFromPage(reader, page, its);
cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(cipherText)));
strText = strText + "\n" + cipherText;
PdfData = strText.Split('\n');
}
reader.Close();
}
catch (Exception ex)
{
}
List<string> temp = PdfData.ToList();
temp.RemoveAt(0);
list = temp.ConvertAll<string[]>(x => x.Split(' ').ToArray());
List<string> columns = list.FirstOrDefault().ToList();
DataTable dtTemp = new DataTable();
columns.All(x => { dtTemp.Columns.Add(new DataColumn(x)); return true; });
list.All(x => { dtTemp.Rows.Add(dtTemp.NewRow().ItemArray = x); return true; });
return dtTemp;
}

If the PDF contains marked content (you can see how to find this in my blog article http://www.jpedal.org/PDFblog/2010/09/the-easy-way-to-discover-if-a-pdf-file-contains-structured-content/) you can extract it from the PDF file. Otherwise you will need to extract the text and try to guess the structure.

Related

How to use Tempdata to display the list

I have did the excel upload in dotnet core .I had to use tempdata to retrieve the details of the excel in list.Instead in my below code i had used Static object to retrieve the list.My code works as like this ,when i click on upload button it will display the details in the excel sheet.and when click on save it will save it to database and i need to edit in grid view using ajax call also .Help me out
My Action in controller is
public async Task<IActionResult> ImportEmployeeDetails(IFormFile excelfile)
{
try
{
EmployeesViewModelList employeesListObject = new EmployeesViewModelList();
List<EmployeeModel> employeesViewModelList = new List<EmployeeModel>();
if (excelfile == null || excelfile.Length == 0)
{
return View(employeesListObject);
}
var supportedTypes = new[] { ".xls", ".xlsx" };
var ext = Path.GetExtension(excelfile.FileName);
if (!supportedTypes.Contains(ext))
{
return View(employeesListObject);
}
var path = Path.Combine(
Directory.GetCurrentDirectory(), "wwwroot",
"EmployeeDetails.xlsx");
if (System.IO.File.Exists(path))
{
System.IO.File.Delete(path);
}
using (var stream = new FileStream(path, FileMode.Create))
{
await excelfile.CopyToAsync(stream);
}
FileInfo file = new FileInfo(path);
using (ExcelPackage package = new ExcelPackage(file))
{
ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
int rowCount = worksheet.Dimension.Rows;
int ColCount = worksheet.Dimension.Columns;
for (int i = 2; i <= rowCount; i++)
{
EmployeeModel emp = new EmployeeModel();
emp.EmployeeId = Convert.ToInt32(worksheet.Cells[i, 1].Value.ToString());
emp.EmpFirstName = worksheet.Cells[i, 2].Value.ToString();
employeesViewModelList.Add(emp);
}
employeesListObject.EmpModelList = employeesViewModelList;
return View(employeesListObject);
}
}
catch(Exception ex)
{
TempData["Message"] = "Opps! Something Went wrong!";
return RedirectToAction("ExcelPackage");
}
}
Try this, using your own list.
List<string> SomeList = new List<string>();
TempData["MyList"] = SomeList;
//then to get data just do
SomeList = TempData["MyList"] as List<string>; //This converts back to List<T>
Once you add the list to the TempData, you can retrive it from any Action or View in the same controller

Open XML generates only <NewDataSet /> tag from a full Excel file, but work for other files

I have an excel file full of data, and I'm trying to use Open XML SDK to convert this file into xml file.
I followed the documentation and other questions here on stackoverflow. However, the output of the xml file is always <NewDataSet />. Knowing that I tried other excel files and it worked fine.
Here is how my excel file looks like:
And here is my code "I tried two approaches":
The first approach is to use DataSet.GetXML(), it returned the same value as the next code does.
var xmlDS = new ConvertExcelToXml().GetXML(filePath);
string xmlPath = server.MapPath("~/UploadedFiles/XML/");
StreamWriter xmlFile = new StreamWriter(xmlPath + Path.GetFileNameWithoutExtension(fileName) + ".xml");
xmlDS.WriteXml(xmlFile);
//The used method to return the excel file dataset
public DataSet GetXML(string filename)
{
using (DataSet ds = new DataSet())
{
ds.Tables.Add(this.ReadExcelFile(filename));
return ds;
}
}
//This method used to return DataTable for previous method
private DataTable ReadExcelFile(string filename)
{
DataTable dt = new DataTable();
try
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
IEnumerable<Sheet> sheetcollection = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheetcollection.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId);
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rowcollection = sheetData.Descendants<Row>();
if (rowcollection.Count() == 0)
{
return dt;
}
foreach (Cell cell in rowcollection.ElementAt(0))
{
dt.Columns.Add(GetValueOfCell(spreadsheetDocument, cell));
}
foreach (Row row in rowcollection)
{
DataRow temprow = dt.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
int cellColumnIndex = GetColumnIndex(GetColumnName(cell.CellReference));
if (columnIndex < cellColumnIndex)
{
do
{
temprow[columnIndex] = string.Empty;
columnIndex++;
}
while (columnIndex < cellColumnIndex);
}
temprow[columnIndex] = GetValueOfCell(spreadsheetDocument, cell);
columnIndex++;
}
dt.Rows.Add(temprow);
}
}
dt.Rows.RemoveAt(0);
return dt;
}
catch (IOException ex)
{
throw new IOException(ex.Message);
}
}

bulk data export to excel and download in ashx file

I have an asp.net application in which I have a js file and an ashx file. Here in a download button click Im calling handler file in ajax call and retrieving sql table data in a json formatted string/data table and Im trying to export json formated string/data table to excel/csv file and download it. Please help me to find a solution. (Need a solution which help to export large amount of data and download)
I tried the below code but its not downloading excel file.
public void ProcessRequest(HttpContext context)
{
context.Response.AddHeader("content-disposition", "attachment; filename=FileName.xls");
context.Response.ContentType = "application/csv";
HttpResponse response = context.Response;
string exportContent = ExportToSpreadsheet(JsonStringToDataTable(GetDataFromTable()),'excelfilename');
response.Write(exportContent);
context.Response.End();
}
public DataTable JsonStringToDataTable(string jsonString)
{
DataTable dt = new DataTable();
string[] jsonStringArray = Regex.Split(jsonString.Replace("[", "").Replace("]", ""), "},{");
List<string> ColumnsName = new List<string>();
foreach (string jSA in jsonStringArray)
{
string[] jsonStringData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
foreach (string ColumnsNameData in jsonStringData)
{
try
{
int idx = ColumnsNameData.IndexOf(":");
string ColumnsNameString = ColumnsNameData.Substring(0, idx - 1).Replace("\"", "");
if (!ColumnsName.Contains(ColumnsNameString))
{
ColumnsName.Add(ColumnsNameString);
}
}
catch (Exception ex)
{
//throw new Exception(string.Format(ex.Message + "Error Parsing Column Name : {0}", ColumnsNameData));
throw ex;
}
}
break;
}
foreach (string AddColumnName in ColumnsName)
{
dt.Columns.Add(AddColumnName);
}
foreach (string jSA in jsonStringArray)
{
string[] RowData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
DataRow nr = dt.NewRow();
foreach (string rowData in RowData)
{
try
{
int idx = rowData.IndexOf(":");
string RowColumns = rowData.Substring(0, idx - 1).Replace("\"", "");
string RowDataString = rowData.Substring(idx + 1).Replace("\"", "");
nr[RowColumns] = RowDataString;
}
catch (Exception ex)
{
continue;
}
}
dt.Rows.Add(nr);
}
return dt;
}
private static string GetDataFromTable()
{
string returnValue = string.Empty;
var serializer = new JavaScriptSerializer { MaxJsonLength = Int32.MaxValue };
try
{
var result = //get data from sql table;
returnValue = serializer.Serialize(result);
}
catch (Exception e)
{
returnValue = serializer.Serialize(e.Message);
}
return returnValue;
}
public string ExportToSpreadsheet(DataTable table, string name)
{
string res = string.Empty;
try
{
//var resp = Response;
System.Web.HttpResponse resp = System.Web.HttpContext.Current.Response;
resp.Clear();
if (table != null)
{
foreach (DataColumn column in table.Columns)
{
resp.Write(column.ColumnName + ",");
}
}
resp.Write(Environment.NewLine);
if (table != null)
{
foreach (DataRow row in table.Rows)
{
for (int i = 0; i < table.Columns.Count; i++)
{
resp.Write(row[i].ToString().Replace(",", string.Empty) + ",");
}
resp.Write(Environment.NewLine);
}
}
res = "successfully downloaded";
resp.ContentType = "text/csv";
resp.AppendHeader("Content-Disposition", "attachment; filename=" + name + ".csv");
// resp.End();
}
catch(Exception ex)
{
res = ex.Message;
}
return res;
}
Start using a specialized libary like EPPlus. It will create real Excel files.
private void exportToExcel(DataTable dataTable)
{
using (ExcelPackage excelPackage = new ExcelPackage())
{
//create the worksheet
ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets.Add("Sheet 1");
//load the datatable into the sheet, with headers
worksheet.Cells["A1"].LoadFromDataTable(dataTable, true);
//send the file to the browser
byte[] bin = excelPackage.GetAsByteArray();
Response.ClearHeaders();
Response.Clear();
Response.Buffer = true;
Response.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
Response.AddHeader("content-length", bin.Length.ToString());
Response.AddHeader("content-disposition", "attachment; filename=\"ExcelDemo.xlsx\"");
Response.OutputStream.Write(bin, 0, bin.Length);
Response.Flush();
HttpContext.Current.ApplicationInstance.CompleteRequest();
}
}

How do I change the format of a specific column in EPPlus?

I've used EPPlus to download my datatable from my website / database to an Excel sheet and the first picture is the result I get. The second picture is what I would like it to be.
Questions:
How do I change the format of my Timestamp to "time"?
Obviously title would still be a string format.
How do I make the width of the columns to match the longest word inside?
So that 80% of the message isn't hidden and you have to drag the column out to read the entire message.
Edit: Completely forgot to add my code...
public ActionResult ExportData()
{
DataTable dataTable = GetData();
using (ExcelPackage package = new ExcelPackage())
{
var ws = package.Workbook.Worksheets.Add("My Sheet");
//true generates headers
ws.Cells["1:1"].Style.Font.Bold = true;
ws.Cells["A1"].LoadFromDataTable(dataTable, true);
ws.Cells[ws.Dimension.Address].AutoFitColumns();
var stream = new MemoryStream();
package.SaveAs(stream);
string fileName = "Log.xlsx";
string contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
stream.Position = 0;
return File(stream, contentType, fileName);
}
}
public DataTable GetData()
{
DataTable dt = new DataTable();
if (ModelState.IsValid)
{
using (SqlConnection conn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["MySqlConnection"].ConnectionString))
{
using (SqlCommand comm = conn.CreateCommand())
{
comm.Parameters.AddWithValue("#val1", Session["myID"]);
comm.Parameters.AddWithValue("#val2", "%" + Session["mySearchString"] + "%");
comm.CommandText = "SELECT * FROM dbo.Log WHERE CustomerId = #val1 AND Message LIKE #val2";
try
{
conn.Open();
dt.Load(comm.ExecuteReader());
}
catch (SqlException e)
{
throw new Exception(e.ToString());
}
}
}
}
return dt;
}
Just need to set the Numberformat.Format string. Like this:
ws.Column(2).Style.Numberformat.Format = "hh:mm:ss";
If you want to customize the actual just there are plenty of resource online like http://www.ozgrid.com/Excel/excel-custom-number-formats.htm. Or you can just open it in excel, set the format to Custom and experiment with the string.

Large text reading in asp.net

This is my asp.net code behind:
public string ReadJSON(string jsonPath)
{
FileStream fs = new FileStream(jsonPath, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs);
string WillReturn = "";
try
{
WillReturn = sr.ReadToEnd();
return WillReturn;
}
catch (Exception ex)
{
WillReturn = null;
return WillReturn;
}
finally { sr.Close(); fs.Dispose(); }
}
But my data is 128 mb. And I am not taking error but no read.
I tried to debuging. WillReturn = sr.ReadToEnd(); context is: WillReturn could not evaluate expression.
How can I read to this?
It can read 127Mb text file into rows in 2 minutes 30++ seconds. Try this sample code
strFileName = ViewState("Physical path");
StreamReader sr = new StreamReader(strFileName);
do {
line = sr.ReadLine();
if ((line != null)) {
result = line.Split(Convert.ToChar(Constants.vbTab));
icount += 1;
dr = ds1.Tables(0).NewRow;
dr.BeginEdit();
dr("Item1") = result(0);
ds1.Tables(0).Rows.Add(dr);
}
} while (!(line == null));

Resources