Removing html links but leaving the text
One of the things that I have needed to do recently is to write a routine to remove the <A> tags from a piece of html, but to leave the actual text untouched.
So <a href='http://www.grahamrobinsonsoftware.com' >Graham Robinson Software</a>
would become Graham Robinson Software
The routine below is what I came up with
Hope its useful to you.
Module TestRegexp
Sub Main()
Dim A As String = ":<a href='Hello Dolly'>Hello Dolly</a>:"
Dim B As String = "<p>Graham</p>:<A href='Hello Dolly2'>Hello Dolly2</A>:<p>Graham2</p>"
Dim C As String = ":<a href='Hello Dolly3'>Hello Dolly3</A>:"
Dim D As String = ":<a href='Hello Dolly4'/>:"
Dim E As String = ":<A href='feheh'>Hello Dolly 5</a>:"
Dim F As String = ":<A href='feheh'>Hello Dolly 5</a><p>Graham</p>:<A href='Hello Dolly2'>Hello Dolly2</A>:<p>Graham2</p>:"
A = RemoveHTMLLinksFromString(A, "a")
Console.WriteLine(RemoveHTMLLinksFromString(A, "A"))
B = RemoveHTMLLinksFromString(B, "a")
Console.WriteLine(RemoveHTMLLinksFromString(B, "A"))
C = RemoveHTMLLinksFromString(C, "a")
Console.WriteLine(RemoveHTMLLinksFromString(C, "A"))
D = RemoveHTMLLinksFromString(D, "a")
Console.WriteLine(RemoveHTMLLinksFromString(D, "A"))
E = RemoveHTMLLinksFromString(E, "a")
Console.WriteLine(RemoveHTMLLinksFromString(E, "A"))
F = RemoveHTMLLinksFromString(F, "a")
Console.WriteLine(RemoveHTMLLinksFromString(F, "A"))
End Sub
Function RemoveHTMLLinksFromString(ByVal InitialString As String, ByVal LinkName As String) As String
Dim TargetString As String = InitialString
'
' Just get rid of all of the </A's
'
While (TargetString.IndexOf("</" + LinkName + ">") > -1)
TargetString = TargetString.Replace("</" + LinkName + ">", "")
End While
'
' Remove the actual links
'
Dim LinkIndex As Integer = 0
Dim EndIndex As Integer = 0
LinkIndex = TargetString.IndexOf("<" + LinkName + " ")
While (LinkIndex > -1)
EndIndex = TargetString.IndexOf(">", LinkIndex + 1)
TargetString = TargetString.Remove(LinkIndex, (EndIndex - LinkIndex) + 1)
LinkIndex = TargetString.IndexOf("<" + LinkName + " ")
End While
Return TargetString
End Function
End Module
So <a href='http://www.grahamrobinsonsoftware.com' >Graham Robinson Software</a>
would become Graham Robinson Software
The routine below is what I came up with
Hope its useful to you.
Module TestRegexp
Sub Main()
Dim A As String = ":<a href='Hello Dolly'>Hello Dolly</a>:"
Dim B As String = "<p>Graham</p>:<A href='Hello Dolly2'>Hello Dolly2</A>:<p>Graham2</p>"
Dim C As String = ":<a href='Hello Dolly3'>Hello Dolly3</A>:"
Dim D As String = ":<a href='Hello Dolly4'/>:"
Dim E As String = ":<A href='feheh'>Hello Dolly 5</a>:"
Dim F As String = ":<A href='feheh'>Hello Dolly 5</a><p>Graham</p>:<A href='Hello Dolly2'>Hello Dolly2</A>:<p>Graham2</p>:"
A = RemoveHTMLLinksFromString(A, "a")
Console.WriteLine(RemoveHTMLLinksFromString(A, "A"))
B = RemoveHTMLLinksFromString(B, "a")
Console.WriteLine(RemoveHTMLLinksFromString(B, "A"))
C = RemoveHTMLLinksFromString(C, "a")
Console.WriteLine(RemoveHTMLLinksFromString(C, "A"))
D = RemoveHTMLLinksFromString(D, "a")
Console.WriteLine(RemoveHTMLLinksFromString(D, "A"))
E = RemoveHTMLLinksFromString(E, "a")
Console.WriteLine(RemoveHTMLLinksFromString(E, "A"))
F = RemoveHTMLLinksFromString(F, "a")
Console.WriteLine(RemoveHTMLLinksFromString(F, "A"))
End Sub
Function RemoveHTMLLinksFromString(ByVal InitialString As String, ByVal LinkName As String) As String
Dim TargetString As String = InitialString
'
' Just get rid of all of the </A's
'
While (TargetString.IndexOf("</" + LinkName + ">") > -1)
TargetString = TargetString.Replace("</" + LinkName + ">", "")
End While
'
' Remove the actual links
'
Dim LinkIndex As Integer = 0
Dim EndIndex As Integer = 0
LinkIndex = TargetString.IndexOf("<" + LinkName + " ")
While (LinkIndex > -1)
EndIndex = TargetString.IndexOf(">", LinkIndex + 1)
TargetString = TargetString.Remove(LinkIndex, (EndIndex - LinkIndex) + 1)
LinkIndex = TargetString.IndexOf("<" + LinkName + " ")
End While
Return TargetString
End Function
End Module
| < Prev | Next > |
|---|
