Grabbing a timesheet HTMLAgilityPack
See the question and my original answer on StackOverflowNot sure I fully understand what you want to do but here is a sample code that should help you get started. I strongly suggest you have a look at XPATH to understand it.
HtmlDocument doc = new HtmlDocument();
doc.Load(yourFile);
// get all TR with a specific class name, starting from root (/), and recursively (//)
foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//tr[@class='tblDataGreyNH' or @class='tblDataWhiteNH']"))
{
// get all TD below the current node with a specific class name
HtmlNode inOrOut = node.SelectSingleNode("td[@class='tblHeader']");
if (inOrOut != null)
{
string io = inOrOut.InnerText.Trim();
Console.WriteLine(io.ToUpper());
if (io.Contains("Time"))
{
// normalize-space gets rid or whitespaces (\r,\n, etc.)
// text() gets the node's inner text
foreach (HtmlNode td in node.SelectNodes("td[normalize-space(@class)='' and normalize-space(text())!='' and normalize-space(text())!='00:00']"))
{
Console.WriteLine("value:" + td.InnerText.Trim());
}
}
}
// gets all TD below the current node that define the NOWRAP attribute
HtmlNodeCollection tdNoWraps = node.SelectNodes("td[@nowrap]");
if (tdNoWraps != null)
{
foreach (HtmlNode tdNoWrap in tdNoWraps)
{
string value = tdNoWrap.InnerText.Trim();
if (value == string.Empty)
continue;
Console.WriteLine("value:" + value);
}
}
}
It will output this from your sample page:
IN
value:7:47
value:7:46
value:7:45
value:7:51
OUT
value:15:35
value:15:33
value:12:38
value:8:59
IN
value:12:38
value:8:59
OUT
value:15:35
TOTAL TIME
value:07:48
value:07:47
value:07:50
value:01:08
REGULAR TIME
value:07:48
value:07:47
value:07:50
value:01:08