To Read word file use NPOI.dll. Install from Nuget and add reference into your project. below is the method for that.
try
{
XWPFDocument wDoc = new XWPFDocument();
using (FileStream fs = new FileStream(fileName, FileMode.Open))
{
wDoc = new XWPFDocument(fs);
}
foreach (XWPFParagraph prg in wDoc.Paragraphs)
{
resumeText = resumeText + prg.Text;
}
resumeText = resumeText.ToLower();
}
catch
{
}
if still word is not supported by this dll means getting error while reading document then write below code. in that i have used DocumentFormat.OpenXml.dll. add package from Nuget.
if (String.IsNullOrEmpty(resumeText))
{
resumeText = ReadWordDocument(fileName);
}
public string ReadWordDocument(string filepath)
{
WordprocessingDocument package = null;
package = WordprocessingDocument.Open(filepath, true);
StringBuilder sb = new StringBuilder();
OpenXmlElement element = package.MainDocumentPart.Document.Body;
if (element == null)
{
return string.Empty;
}
sb.Append(GetPlainText(element));
return sb.ToString();
}
public string GetPlainText(OpenXmlElement element)
{
StringBuilder PlainTextInWord = new StringBuilder();
foreach (OpenXmlElement section in element.Elements())
{
switch (section.LocalName)
{
// Text
case "t":
PlainTextInWord.Append(section.InnerText);
break;
case "cr": // Carriage return
case "br": // Page break
PlainTextInWord.Append(Environment.NewLine);
break;
// Tab
case "tab":
PlainTextInWord.Append("\t");
break;
// Paragraph
case "p":
PlainTextInWord.Append(GetPlainText(section));
PlainTextInWord.AppendLine(Environment.NewLine);
break;
default:
PlainTextInWord.Append(GetPlainText(section));
break;
}
}
return PlainTextInWord.ToString();
}
try
{
XWPFDocument wDoc = new XWPFDocument();
using (FileStream fs = new FileStream(fileName, FileMode.Open))
{
wDoc = new XWPFDocument(fs);
}
foreach (XWPFParagraph prg in wDoc.Paragraphs)
{
resumeText = resumeText + prg.Text;
}
resumeText = resumeText.ToLower();
}
catch
{
}
if still word is not supported by this dll means getting error while reading document then write below code. in that i have used DocumentFormat.OpenXml.dll. add package from Nuget.
if (String.IsNullOrEmpty(resumeText))
{
resumeText = ReadWordDocument(fileName);
}
public string ReadWordDocument(string filepath)
{
WordprocessingDocument package = null;
package = WordprocessingDocument.Open(filepath, true);
StringBuilder sb = new StringBuilder();
OpenXmlElement element = package.MainDocumentPart.Document.Body;
if (element == null)
{
return string.Empty;
}
sb.Append(GetPlainText(element));
return sb.ToString();
}
public string GetPlainText(OpenXmlElement element)
{
StringBuilder PlainTextInWord = new StringBuilder();
foreach (OpenXmlElement section in element.Elements())
{
switch (section.LocalName)
{
// Text
case "t":
PlainTextInWord.Append(section.InnerText);
break;
case "cr": // Carriage return
case "br": // Page break
PlainTextInWord.Append(Environment.NewLine);
break;
// Tab
case "tab":
PlainTextInWord.Append("\t");
break;
// Paragraph
case "p":
PlainTextInWord.Append(GetPlainText(section));
PlainTextInWord.AppendLine(Environment.NewLine);
break;
default:
PlainTextInWord.Append(GetPlainText(section));
break;
}
}
return PlainTextInWord.ToString();
}
No comments:
Post a Comment