Is there any way to convert PDF file to DataTable? The PDF file mainly consist of only tables any help will be highly appreciated.
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
public DataTable ImportPDF(string Filename)
{
string strText = string.Empty;
List<string[]> list = new List<string[]>();
string[] PdfData = null;
try
{
PdfReader reader = new PdfReader((string)Filename);
for (int page = 1; page <= reader.NumberOfPages; page++)
{
ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
String cipherText = PdfTextExtractor.GetTextFromPage(reader, page, its);
cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(cipherText)));
strText = strText + "\n" + cipherText;
PdfData = strText.Split('\n');
}
reader.Close();
}
catch (Exception ex)
{
}
List<string> temp = PdfData.ToList();
temp.RemoveAt(0);
list = temp.ConvertAll<string[]>(x => x.Split(' ').ToArray());
List<string> columns = list.FirstOrDefault().ToList();
DataTable dtTemp = new DataTable();
columns.All(x => { dtTemp.Columns.Add(new DataColumn(x)); return true; });
list.All(x => { dtTemp.Rows.Add(dtTemp.NewRow().ItemArray = x); return true; });
return dtTemp;
}
If the PDF contains marked content (you can see how to find this in my blog article http://www.jpedal.org/PDFblog/2010/09/the-easy-way-to-discover-if-a-pdf-file-contains-structured-content/) you can extract it from the PDF file. Otherwise you will need to extract the text and try to guess the structure.
Related
I have did the excel upload in dotnet core .I had to use tempdata to retrieve the details of the excel in list.Instead in my below code i had used Static object to retrieve the list.My code works as like this ,when i click on upload button it will display the details in the excel sheet.and when click on save it will save it to database and i need to edit in grid view using ajax call also .Help me out
My Action in controller is
public async Task<IActionResult> ImportEmployeeDetails(IFormFile excelfile)
{
try
{
EmployeesViewModelList employeesListObject = new EmployeesViewModelList();
List<EmployeeModel> employeesViewModelList = new List<EmployeeModel>();
if (excelfile == null || excelfile.Length == 0)
{
return View(employeesListObject);
}
var supportedTypes = new[] { ".xls", ".xlsx" };
var ext = Path.GetExtension(excelfile.FileName);
if (!supportedTypes.Contains(ext))
{
return View(employeesListObject);
}
var path = Path.Combine(
Directory.GetCurrentDirectory(), "wwwroot",
"EmployeeDetails.xlsx");
if (System.IO.File.Exists(path))
{
System.IO.File.Delete(path);
}
using (var stream = new FileStream(path, FileMode.Create))
{
await excelfile.CopyToAsync(stream);
}
FileInfo file = new FileInfo(path);
using (ExcelPackage package = new ExcelPackage(file))
{
ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
int rowCount = worksheet.Dimension.Rows;
int ColCount = worksheet.Dimension.Columns;
for (int i = 2; i <= rowCount; i++)
{
EmployeeModel emp = new EmployeeModel();
emp.EmployeeId = Convert.ToInt32(worksheet.Cells[i, 1].Value.ToString());
emp.EmpFirstName = worksheet.Cells[i, 2].Value.ToString();
employeesViewModelList.Add(emp);
}
employeesListObject.EmpModelList = employeesViewModelList;
return View(employeesListObject);
}
}
catch(Exception ex)
{
TempData["Message"] = "Opps! Something Went wrong!";
return RedirectToAction("ExcelPackage");
}
}
Try this, using your own list.
List<string> SomeList = new List<string>();
TempData["MyList"] = SomeList;
//then to get data just do
SomeList = TempData["MyList"] as List<string>; //This converts back to List<T>
Once you add the list to the TempData, you can retrive it from any Action or View in the same controller
I have an excel file full of data, and I'm trying to use Open XML SDK to convert this file into xml file.
I followed the documentation and other questions here on stackoverflow. However, the output of the xml file is always <NewDataSet />. Knowing that I tried other excel files and it worked fine.
Here is how my excel file looks like:
And here is my code "I tried two approaches":
The first approach is to use DataSet.GetXML(), it returned the same value as the next code does.
var xmlDS = new ConvertExcelToXml().GetXML(filePath);
string xmlPath = server.MapPath("~/UploadedFiles/XML/");
StreamWriter xmlFile = new StreamWriter(xmlPath + Path.GetFileNameWithoutExtension(fileName) + ".xml");
xmlDS.WriteXml(xmlFile);
//The used method to return the excel file dataset
public DataSet GetXML(string filename)
{
using (DataSet ds = new DataSet())
{
ds.Tables.Add(this.ReadExcelFile(filename));
return ds;
}
}
//This method used to return DataTable for previous method
private DataTable ReadExcelFile(string filename)
{
DataTable dt = new DataTable();
try
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
IEnumerable<Sheet> sheetcollection = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheetcollection.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId);
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rowcollection = sheetData.Descendants<Row>();
if (rowcollection.Count() == 0)
{
return dt;
}
foreach (Cell cell in rowcollection.ElementAt(0))
{
dt.Columns.Add(GetValueOfCell(spreadsheetDocument, cell));
}
foreach (Row row in rowcollection)
{
DataRow temprow = dt.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
int cellColumnIndex = GetColumnIndex(GetColumnName(cell.CellReference));
if (columnIndex < cellColumnIndex)
{
do
{
temprow[columnIndex] = string.Empty;
columnIndex++;
}
while (columnIndex < cellColumnIndex);
}
temprow[columnIndex] = GetValueOfCell(spreadsheetDocument, cell);
columnIndex++;
}
dt.Rows.Add(temprow);
}
}
dt.Rows.RemoveAt(0);
return dt;
}
catch (IOException ex)
{
throw new IOException(ex.Message);
}
}
I have an asp.net application in which I have a js file and an ashx file. Here in a download button click Im calling handler file in ajax call and retrieving sql table data in a json formatted string/data table and Im trying to export json formated string/data table to excel/csv file and download it. Please help me to find a solution. (Need a solution which help to export large amount of data and download)
I tried the below code but its not downloading excel file.
public void ProcessRequest(HttpContext context)
{
context.Response.AddHeader("content-disposition", "attachment; filename=FileName.xls");
context.Response.ContentType = "application/csv";
HttpResponse response = context.Response;
string exportContent = ExportToSpreadsheet(JsonStringToDataTable(GetDataFromTable()),'excelfilename');
response.Write(exportContent);
context.Response.End();
}
public DataTable JsonStringToDataTable(string jsonString)
{
DataTable dt = new DataTable();
string[] jsonStringArray = Regex.Split(jsonString.Replace("[", "").Replace("]", ""), "},{");
List<string> ColumnsName = new List<string>();
foreach (string jSA in jsonStringArray)
{
string[] jsonStringData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
foreach (string ColumnsNameData in jsonStringData)
{
try
{
int idx = ColumnsNameData.IndexOf(":");
string ColumnsNameString = ColumnsNameData.Substring(0, idx - 1).Replace("\"", "");
if (!ColumnsName.Contains(ColumnsNameString))
{
ColumnsName.Add(ColumnsNameString);
}
}
catch (Exception ex)
{
//throw new Exception(string.Format(ex.Message + "Error Parsing Column Name : {0}", ColumnsNameData));
throw ex;
}
}
break;
}
foreach (string AddColumnName in ColumnsName)
{
dt.Columns.Add(AddColumnName);
}
foreach (string jSA in jsonStringArray)
{
string[] RowData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
DataRow nr = dt.NewRow();
foreach (string rowData in RowData)
{
try
{
int idx = rowData.IndexOf(":");
string RowColumns = rowData.Substring(0, idx - 1).Replace("\"", "");
string RowDataString = rowData.Substring(idx + 1).Replace("\"", "");
nr[RowColumns] = RowDataString;
}
catch (Exception ex)
{
continue;
}
}
dt.Rows.Add(nr);
}
return dt;
}
private static string GetDataFromTable()
{
string returnValue = string.Empty;
var serializer = new JavaScriptSerializer { MaxJsonLength = Int32.MaxValue };
try
{
var result = //get data from sql table;
returnValue = serializer.Serialize(result);
}
catch (Exception e)
{
returnValue = serializer.Serialize(e.Message);
}
return returnValue;
}
public string ExportToSpreadsheet(DataTable table, string name)
{
string res = string.Empty;
try
{
//var resp = Response;
System.Web.HttpResponse resp = System.Web.HttpContext.Current.Response;
resp.Clear();
if (table != null)
{
foreach (DataColumn column in table.Columns)
{
resp.Write(column.ColumnName + ",");
}
}
resp.Write(Environment.NewLine);
if (table != null)
{
foreach (DataRow row in table.Rows)
{
for (int i = 0; i < table.Columns.Count; i++)
{
resp.Write(row[i].ToString().Replace(",", string.Empty) + ",");
}
resp.Write(Environment.NewLine);
}
}
res = "successfully downloaded";
resp.ContentType = "text/csv";
resp.AppendHeader("Content-Disposition", "attachment; filename=" + name + ".csv");
// resp.End();
}
catch(Exception ex)
{
res = ex.Message;
}
return res;
}
Start using a specialized libary like EPPlus. It will create real Excel files.
private void exportToExcel(DataTable dataTable)
{
using (ExcelPackage excelPackage = new ExcelPackage())
{
//create the worksheet
ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets.Add("Sheet 1");
//load the datatable into the sheet, with headers
worksheet.Cells["A1"].LoadFromDataTable(dataTable, true);
//send the file to the browser
byte[] bin = excelPackage.GetAsByteArray();
Response.ClearHeaders();
Response.Clear();
Response.Buffer = true;
Response.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
Response.AddHeader("content-length", bin.Length.ToString());
Response.AddHeader("content-disposition", "attachment; filename=\"ExcelDemo.xlsx\"");
Response.OutputStream.Write(bin, 0, bin.Length);
Response.Flush();
HttpContext.Current.ApplicationInstance.CompleteRequest();
}
}
I've used EPPlus to download my datatable from my website / database to an Excel sheet and the first picture is the result I get. The second picture is what I would like it to be.
Questions:
How do I change the format of my Timestamp to "time"?
Obviously title would still be a string format.
How do I make the width of the columns to match the longest word inside?
So that 80% of the message isn't hidden and you have to drag the column out to read the entire message.
Edit: Completely forgot to add my code...
public ActionResult ExportData()
{
DataTable dataTable = GetData();
using (ExcelPackage package = new ExcelPackage())
{
var ws = package.Workbook.Worksheets.Add("My Sheet");
//true generates headers
ws.Cells["1:1"].Style.Font.Bold = true;
ws.Cells["A1"].LoadFromDataTable(dataTable, true);
ws.Cells[ws.Dimension.Address].AutoFitColumns();
var stream = new MemoryStream();
package.SaveAs(stream);
string fileName = "Log.xlsx";
string contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
stream.Position = 0;
return File(stream, contentType, fileName);
}
}
public DataTable GetData()
{
DataTable dt = new DataTable();
if (ModelState.IsValid)
{
using (SqlConnection conn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["MySqlConnection"].ConnectionString))
{
using (SqlCommand comm = conn.CreateCommand())
{
comm.Parameters.AddWithValue("#val1", Session["myID"]);
comm.Parameters.AddWithValue("#val2", "%" + Session["mySearchString"] + "%");
comm.CommandText = "SELECT * FROM dbo.Log WHERE CustomerId = #val1 AND Message LIKE #val2";
try
{
conn.Open();
dt.Load(comm.ExecuteReader());
}
catch (SqlException e)
{
throw new Exception(e.ToString());
}
}
}
}
return dt;
}
Just need to set the Numberformat.Format string. Like this:
ws.Column(2).Style.Numberformat.Format = "hh:mm:ss";
If you want to customize the actual just there are plenty of resource online like http://www.ozgrid.com/Excel/excel-custom-number-formats.htm. Or you can just open it in excel, set the format to Custom and experiment with the string.
This is my asp.net code behind:
public string ReadJSON(string jsonPath)
{
FileStream fs = new FileStream(jsonPath, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs);
string WillReturn = "";
try
{
WillReturn = sr.ReadToEnd();
return WillReturn;
}
catch (Exception ex)
{
WillReturn = null;
return WillReturn;
}
finally { sr.Close(); fs.Dispose(); }
}
But my data is 128 mb. And I am not taking error but no read.
I tried to debuging. WillReturn = sr.ReadToEnd(); context is: WillReturn could not evaluate expression.
How can I read to this?
It can read 127Mb text file into rows in 2 minutes 30++ seconds. Try this sample code
strFileName = ViewState("Physical path");
StreamReader sr = new StreamReader(strFileName);
do {
line = sr.ReadLine();
if ((line != null)) {
result = line.Split(Convert.ToChar(Constants.vbTab));
icount += 1;
dr = ds1.Tables(0).NewRow;
dr.BeginEdit();
dr("Item1") = result(0);
ds1.Tables(0).Rows.Add(dr);
}
} while (!(line == null));