Bullracer2
12-09-2013, 07:01 PM
Hi I've spent hours trawling these forums and thought it time to ask for help!
Essentially I'm wanting to extract all hyperlinks from a few hundred word documents (of various sizes) into an excel file. The information i'm after is the document name, hyperlink text, and hyperlink address. I've setup 3 columns accordingly for this.
I think the closest i've got is from using some code macropod wrote however it does nothing in my situation. I believe the issue is around identifying the hyperlink and perhaps some other formatting i've used the .style = "hyperlink" to find. Also not sure how the extract and display the 3 pieces of information i require...
Sub GetHyperlinks()
Application.ScreenUpdating = False
Dim StrFolder As String, StrFile As String, StrFnd As String, StrTxt As String
Dim wdApp As Object, wdDoc As Object, bStrt As Boolean
Dim WkSht As Worksheet, LRow As Long, LCol As Long, i As Long
Const wdFindContinue As Long = 1, wdFindStop As Long = 0
Const wdReplaceOne As Long = 0, wdReplaceAll As Long = 2
StrFolder = GetFolder
If StrFolder = "" Then Exit Sub
Set WkSht = ThisWorkbook.Sheets("Sheet1")
LRow = WkSht.Cells.SpecialCells(xlCellTypeLastCell).Row
LCol = WkSht.Cells.SpecialCells(xlCellTypeLastCell).Column
' Test whether Word is already running.
On Error Resume Next
bStrt = False ' Flag to record if we start Word, so we can close it later.
Set wdApp = GetObject(, "Word.Application")
'Start Excel if it isn't running
If wdApp Is Nothing Then
Set wdApp = CreateObject("Word.Application")
If wdApp Is Nothing Then
MsgBox "Can't start Word.", vbExclamation
Exit Sub
End If
' Record that we've started Word.
bStrt = True
End If
On Error GoTo 0
StrFile = Dir(StrFolder & "\*.doc", vbNormal)
While StrFile <> ""
LRow = LRow + 1
Set wdDoc = wdApp.Documents.Open(Filename:=StrFolder & "\" & StrFile, AddToRecentFiles:=False, Visible:=False, ReadOnly:=True)
'Do some pre-processing cleanup
With wdDoc
'Get the data for each defined Excel column
For i = 1 To LCol
StrFnd = WkSht.Cells(1, i).Value
With .Range
With .Find
.ClearFormatting
.Style = "Hyperlink"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = True
.Execute
End With
If .Find.Found = True Then
'Parse the data
StrTxt = .Duplicate.Text
'Update Excel
WkSht.Cells(LRow, i).Value = StrTxt
End If
End With
Next
.Close SaveChanges:=False
End With
StrFile = Dir()
Wend
If bStrt = True Then wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub
Thanks
Essentially I'm wanting to extract all hyperlinks from a few hundred word documents (of various sizes) into an excel file. The information i'm after is the document name, hyperlink text, and hyperlink address. I've setup 3 columns accordingly for this.
I think the closest i've got is from using some code macropod wrote however it does nothing in my situation. I believe the issue is around identifying the hyperlink and perhaps some other formatting i've used the .style = "hyperlink" to find. Also not sure how the extract and display the 3 pieces of information i require...
Sub GetHyperlinks()
Application.ScreenUpdating = False
Dim StrFolder As String, StrFile As String, StrFnd As String, StrTxt As String
Dim wdApp As Object, wdDoc As Object, bStrt As Boolean
Dim WkSht As Worksheet, LRow As Long, LCol As Long, i As Long
Const wdFindContinue As Long = 1, wdFindStop As Long = 0
Const wdReplaceOne As Long = 0, wdReplaceAll As Long = 2
StrFolder = GetFolder
If StrFolder = "" Then Exit Sub
Set WkSht = ThisWorkbook.Sheets("Sheet1")
LRow = WkSht.Cells.SpecialCells(xlCellTypeLastCell).Row
LCol = WkSht.Cells.SpecialCells(xlCellTypeLastCell).Column
' Test whether Word is already running.
On Error Resume Next
bStrt = False ' Flag to record if we start Word, so we can close it later.
Set wdApp = GetObject(, "Word.Application")
'Start Excel if it isn't running
If wdApp Is Nothing Then
Set wdApp = CreateObject("Word.Application")
If wdApp Is Nothing Then
MsgBox "Can't start Word.", vbExclamation
Exit Sub
End If
' Record that we've started Word.
bStrt = True
End If
On Error GoTo 0
StrFile = Dir(StrFolder & "\*.doc", vbNormal)
While StrFile <> ""
LRow = LRow + 1
Set wdDoc = wdApp.Documents.Open(Filename:=StrFolder & "\" & StrFile, AddToRecentFiles:=False, Visible:=False, ReadOnly:=True)
'Do some pre-processing cleanup
With wdDoc
'Get the data for each defined Excel column
For i = 1 To LCol
StrFnd = WkSht.Cells(1, i).Value
With .Range
With .Find
.ClearFormatting
.Style = "Hyperlink"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = True
.Execute
End With
If .Find.Found = True Then
'Parse the data
StrTxt = .Duplicate.Text
'Update Excel
WkSht.Cells(LRow, i).Value = StrTxt
End If
End With
Next
.Close SaveChanges:=False
End With
StrFile = Dir()
Wend
If bStrt = True Then wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub
Thanks