excel macro search for word and copy sentence - macos

I hope someone can help me with this problem.
I have two documents, one is Word and one is Excel. In the word file I have a list of items, for example:
Title Subtitle
1. Name
Address:
Phone number:
2. Name
Address:
Phone number:
3. Name
Address:
Phone number:
In the excel file I have a list of words in column D. What I want to do is take the word from column D, search for it in the Word document and then copy the sentence from after "Address: " to the ".", put that in Column C (i.e., one cell to the left), and then copy the sentence from after "Phone number: " to the "." and put it in Column B.
One of the parts I can't really wrap my head around is going from the first set of name, address and phone number to the next set.
Can someone help me with the macro on how to do this?
I have thought of expanding it from this:
Sub wordSearch()
' Purpose: display the text between (but not including)
' the words "Title" and "Address" if they both appear.
Dim rng1 As Range
Dim rng2 As Range
Dim strTheText As String
Set rng1 = ActiveDocument.Range
If rng1.Find.Execute(FindText:="Example:") Then
Set rng2 = ActiveDocument.Range(rng1.End, ActiveDocument.Range.End)
If rng2.Find.Execute(FindText:=".") Then
strTheText = ActiveDocument.Range(rng1.End, rng2.Start).Text
MsgBox strTheText
End If
End If
End Sub

As an example, the sub for Excel below gets whole text from catalog.doc located in the same folder as Excel file, parses text with RegExp, loops through contacts and puts it into the Dictionary, then loops through D2:D10 cells and assign appropriate data for matched names to C and B columns respectively. Tested in MS Office 2003, Windows 7 HB.
Option Explicit
Sub GetFromWord()
' Tools - References - add these:
' Microsoft Word 11.0 Object Library
' Microsoft VBScript Regular Expressions 5.5
' Microsoft Scripting Runtime
Dim strCont As String
Dim objCatalog As Scripting.Dictionary
Dim objMatch As IMatch2
Dim objElt As Range
With New Word.Application
.Documents.Open ThisWorkbook.Path & "\catalog.doc"
With .ActiveDocument.Range
.WholeStory
strCont = .Text
End With
.Quit
End With
Set objCatalog = New Scripting.Dictionary
With New RegExp
.Pattern = "\d+\.[ \t]*([^\n\r]*)\s*Address:[ \t]*([^\n\r]*)\s*Phone number:[ \t]*([^\n\r]*)\s*"
.Global = True
.MultiLine = True
.IgnoreCase = True
For Each objMatch In .Execute(strCont)
objCatalog.Add objMatch.SubMatches(0), Array(objMatch.SubMatches(1), objMatch.SubMatches(2))
Next
End With
For Each objElt In Range("D2:D10")
With objElt
If objCatalog.Exists(.Cells(1, 1).Value) Then
.Offset(0, -1) = objCatalog(.Cells(1, 1).Value)(0)
.Offset(0, -2) = objCatalog(.Cells(1, 1).Value)(1)
End If
End With
Next
End Sub
Note, that duplicated contacts in Word will result in error, no additional check implemented.
UPD: In case of any problems with early binding you can use late binding CreateObject(ProgID) as follows, but it isn't a best practice in VBA:
Option Explicit
Sub GetFromWordLBind()
Dim strCont As String
Dim objCatalog, objMatch, objElt As Object
With CreateObject("Word.Application")
.Documents.Open ThisWorkbook.Path & "\catalog.docx"
With .ActiveDocument.Range
.WholeStory
strCont = .Text
End With
.Quit
End With
Set objCatalog = CreateObject("Scripting.Dictionary")
With CreateObject("VBScript.RegExp")
.Pattern = "\d+\.[ \t]*([^\n\r]*)\s*Address:[ \t]*([^\n\r]*)\s*Phone number:[ \t]*([^\n\r]*)\s*"
.Global = True
.MultiLine = True
.IgnoreCase = True
For Each objMatch In .Execute(strCont)
objCatalog.Add objMatch.SubMatches(0), Array(objMatch.SubMatches(1), objMatch.SubMatches(2))
Next
End With
For Each objElt In Range("D2:D10")
With objElt
If objCatalog.Exists(.Cells(1, 1).Value) Then
.Offset(0, -1) = objCatalog(.Cells(1, 1).Value)(0)
.Offset(0, -2) = objCatalog(.Cells(1, 1).Value)(1)
End If
End With
Next
End Sub

Related

Macro to extract references out of a word document

I am a researcher where I have to work frequently with research papers containing references in following formats embedded within text
Group 1
(Sajid, 2021); or
(Sajid and Aqeel, 2021); or
(Sajid et al., 2021)
or
Group 2
Sajid (2021); or
Sajid and Aqeel (2021); or
Sajid et al. (2021)
I have a following macro that works well with first group 1 but it does not works with Group 2
Sub ExtractRefsFromSelection()
MsgBox ("This macro extracts references from selected text.")
Dim SearchRange As range, DestinationDoc$, SourceDoc$
DestinationDoc$ = "Refs.doc"
SourceDoc$ = ActiveDocument.Name
Documents.Add DocumentType:=wdNewBlankDocument
ActiveDocument.SaveAs DestinationDoc$, wdFormatDocument
Documents(SourceDoc$).Activate
Set SearchRange = ActiveDocument.range
With SearchRange.Find
.ClearFormatting
.Text = "\([!\)]#[0-9]{4}\)"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
While .Execute
Documents(DestinationDoc$).range.Text = Documents(DestinationDoc$).range.Text + SearchRange.Text
Wend
End With
End Sub
I have tried amending the following line
.Text = "\([!\)]#[0-9]{4}\)"
to
.Text = "[!\)]\(#[0-9]{4}\)"
but it extracts year only like (2021) only but not its authors of citations in Group 2
any help?
If the documents you're working with have used Word's referencing tools, you can replicate the references then insert the full details of each citation, with the author/editor details. For example:
Sub Demo()
Application.ScreenUpdating = False
Dim DocSrc As Document, DocTgt As Document, Fld As Field, i As Long
Set DocSrc = ActiveDocument: Set DocTgt = Documents.Add
With DocSrc
With .Bibliography
For i = 1 To .Sources.Count
DocTgt.Bibliography.Sources.Add .Sources(i).XML
Next
End With
For Each Fld In .Fields
With Fld
If .Type = wdFieldCitation Then
With DocTgt
.Range.InsertAfter vbCr
.Fields.Add .Paragraphs.Last.Range, wdFieldEmpty, Replace(Fld.Code.Text, "\n", ""), False
End With
End If
End With
Next
End With
Application.ScreenUpdating = True
End Sub

Improve Looping Efficiency in VBA

I have a For loop that loops through integers 1 to 9 and simply finds the bottom most entry that corresponds to that integer ( i.e. 1,1,1,2,3,4,5 would find the 3rd "1" entry) and inserts a blank row. I concatenate the number with a string "FN" that just corresponds to the application for this code, just to clarify. Anyway, it works well, but it lags quite a bit for only having to run through 9 integers. I was hoping someone would be able to help me debug to improve speed on this code. Thanks!
Bonus points if anyone can shed some light on a good way to populate the blank row being inserted with a formatted copy of the header of the page that spans ("A1:L1"). The code I attempted is commented out right before Next i.
Sub test()
Dim i As Integer, Line As String, Cards As Range
Dim Head As Range, LR2 As Long
For i = 1 To 9
Line = "FN" & CStr(i)
Set Cards = Sheets(1).Cells.Find(Line, after:=Cells(1, 1), searchdirection:=xlPrevious)
Cards.Rows.Offset(1).EntireRow.Insert
Cards.Offset(1).EntireRow.Select
' Range("A" & (ActiveCell.Row), "K" & (ActiveCell.Row)) = Range("A3:K3")
' Range("A" & (ActiveCell.Row), "K" & (ActiveCell.Row)).Font.Background = Range("A3:K3").Font.Background
Next i
End Sub
This works pretty fast for me
Sub Sample()
Dim i As Long, line As String, Cards As Range
With Sheets(1)
For i = 1 To 9
line = "FN" & i
Set Cards = .Columns(6).Find(line, LookIn:=xlValues, lookat:=xlWhole)
If Not Cards Is Nothing Then
.Range("A3:K3").Copy
Cards.Offset(1, -5).Insert Shift:=xlDown
End If
Next i
End With
End Sub
Before
After
Most of your improvements will come from altering the application environment variables with the appTGGL helper function but there are a few tweaks in the base code here.
Option Explicit
Sub ewrety()
Dim f As Long, fn0 As String, fndfn As Range
'appTGGL btggl:=false 'uncomment this when you are confident in it
With Worksheets(1).Columns("F")
For f = 1 To 9
fn0 = Format$(f, "\F\N0")
Set fndfn = .Find(What:=fn0, After:=.Cells(1), LookIn:=xlFormulas, LookAt:=xlWhole, _
SearchOrder:=xlByRows, SearchDirection:=xlPrevious, MatchCase:=False)
With fndfn
.Offset(1, -5).EntireRow.Insert Shift:=xlDown
.Parent.Range("A1:L1, XFC1").Copy Destination:=.Offset(1, -5)
End With
Next f
End With
appTGGL
End Sub
Public Sub appTGGL(Optional bTGGL As Boolean = True)
With Application
.ScreenUpdating = bTGGL
.EnableEvents = bTGGL
.DisplayAlerts = bTGGL
.AutoRecover.Enabled = bTGGL 'no interruptions with an auto-save
.Calculation = IIf(bTGGL, xlCalculationAutomatic, xlCalculationManual)
.CutCopyMode = False
.StatusBar = vbNullString
End With
Debug.Print Timer
End Sub

VBScript Replace specific value with regex and modify text file

I know there are a lot questions similar to this one but i couldn't find the right answer for me. I need to replace all phrases in xml file that starts and ends with % (e.g. %TEST% or %TEST-NEW% )
So far i have these tryouts:
This was my test one that works in the console but has only 1 line of string
zone = "<test>%TEST%</test>"
MsgBox zone
'Setting the regex and cheking the matches
set regex = New RegExp
regex.IgnoreCase = True
regex.Global = True
regex.Pattern = "%.+%"
Set myMatches = regex.execute(zone)
For each myMatch in myMatches
Wscript.echo myMatch
result = Replace(zone,myMatch,"")
next
MsgBox result
but when i try to do the same from a file with this...
Dim objStream, strData, fields
Set objStream = CreateObject("ADODB.Stream")
objStream.CharSet = "utf-8"
objStream.Open
objStream.LoadFromFile("C:\test\test.xml")
strData = objStream.ReadText()
Wscript.echo strData
set regex = New RegExp
regex.IgnoreCase = True
regex.Global = True
regex.Pattern = "%.+%"
Set myMatches = regex.execute(strData)
For each myMatch in myMatches
Wscript.echo myMatch
result = Replace(strData,myMatch,"")
next
Wscript.echo result
...the first echo returns correctly the contains of the file and then the second echo in the loop echoes all the matches that i need to replace , but the last echo return the same result as the first (nothing is being replaced)
The xml looks like this (just for example):
<script>%TEST%</script>
<value>%VALUE%</value>
<test>%TEST%</test>
P.S. I need to loop through xml files in a specific folder and replace the phrase from above. Can anyone help?
The final script that works for me(big thanks to Tomalak):
Option Explicit
Dim path, doc, node, placeholder,srcFolder,FSO,FLD,fil
Set placeholder = New RegExp
placeholder.Pattern = "%[^%]+%"
placeholder.Global = True
srcFolder = "C:\test"
Set FSO = CreateObject("Scripting.FileSystemObject")
Set FLD = FSO.GetFolder(srcFolder)
For each fil In FLD.Files
if LCase(FSO.GetExtensionName(fil.Name)) = "xml" Then
path = "C:\test\" & fil.Name
' 1. parse the XML into a DOM
Set doc = LoadXmlDoc(path)
' 2. select and modify DOM nodes
For Each node In doc.selectNodes("//text()|//#*")
node.nodeValue = SubstitutePlaceholders(node.nodeValue)
Next
' 3. save modified DOM back to file
doc.save path
End If
Next
' --------------------------------------------------------------------------
Function LoadXmlDoc(path)
Set LoadXmlDoc = CreateObject("MSXML2.DomDocument.6.0")
LoadXmlDoc.async = False
LoadXmlDoc.load path
If LoadXmlDoc.parseError.errorCode <> 0 Then
WScript.Echo "Error in XML file."
WScript.Echo LoadXmlDoc.parseError.reason
WScript.Quit 1
End If
End Function
' --------------------------------------------------------------------------
Function SubstitutePlaceholders(text)
Dim match
For Each match In placeholder.Execute(text)
text = Replace(text, match, GetReplacement(match))
Next
SubstitutePlaceholders = text
End Function
' --------------------------------------------------------------------------
Function GetReplacement(placeholder)
Select Case placeholder
Case "%TEST%": GetReplacement = "new value"
Case "%BLA%": GetReplacement = "other new value"
Case Else: GetReplacement = placeholder
End Select
End Function
' --------------------------------------------------------------------------
Never use regular expressions on XML files, period.
Use an XML parser. It will be simpler, the code will be easier to read, and most importantly: It will not break the XML.
Here is how to modify your XML document in the proper way.
Option Explicit
Dim path, doc, node, placeholder
Set placeholder = New RegExp
placeholder.Pattern = "%[^%]+%"
placeholder.Global = True
path = "C:\path\to\your.xml"
' 1. parse the XML into a DOM
Set doc = LoadXmlDoc(path)
' 2. select and modify DOM nodes
For Each node In doc.selectNodes("//text()|//#*")
node.nodeValue = SubstitutePlaceholders(node.nodeValue)
Next
' 3. save modified DOM back to file
doc.save path
' --------------------------------------------------------------------------
Function LoadXmlDoc(path)
Set LoadXmlDoc = CreateObject("MSXML2.DomDocument.6.0")
LoadXmlDoc.async = False
LoadXmlDoc.load path
If LoadXmlDoc.parseError.errorCode <> 0 Then
WScript.Echo "Error in XML file."
WScript.Echo LoadXmlDoc.parseError.reason
WScript.Quit 1
End If
End Function
' --------------------------------------------------------------------------
Function SubstitutePlaceholders(text)
Dim match
For Each match In placeholder.Execute(text)
text = Replace(text, match, GetReplacement(match))
Next
SubstitutePlaceholders = text
End Function
' --------------------------------------------------------------------------
Function GetReplacement(placeholder)
Select Case placeholder
Case "%TEST%": GetReplacement = "new value"
Case "%BLA%": GetReplacement = "other new value"
Case Else: GetReplacement = placeholder
End Select
End Function
' --------------------------------------------------------------------------
The XPath expression //text()|//#* targets all text nodes and all attribute nodes. Use a different XPath expression if necessary. (I will not cover XPath basics here, there are plenty of resources for learning it.)
Of course this solution uses regular expressions, but it does that on the text values that the XML structure contains, not on the XML structure itself. That's a crucial difference.

How can run the following code on multiple Excel sheets?

I have a code which I would like to use on multiple sheets, except one sheet. But applying the code to alle sheets is also fine.
Here is the code that I would like to adjust. I am have currently applied it to Excel 2011 in OS X , but I would like to use it for Excel 2010 in Windows.
Private Sub Worksheet_Change(ByVal Target As Range)
If Target.Address = "$A$1" Then
Dim the_selection As String
Dim month_in_review As String
the_selection = Sheet1.Range("A1")
Dim Rep As Integer
For Rep = 2 To 379
the_column = GetColumnLetter_ByInteger(Rep)
month_in_review = Sheet1.Range(the_column & "1")
If the_selection = month_in_review Then
Sheet1.Range(the_column & ":" & the_column).EntireColumn.Hidden = False
Else
Sheet1.Range(the_column & ":" & the_column).EntireColumn.Hidden = True
End If
Next Rep
End If
End Sub
In the module I have the following code:
Public Function GetColumnLetter_ByInteger(what_number As Integer) As String
GetColumnLetter_ByInteger = ""
MyColumn_Integer = what_number
If MyColumn_Ineger <= 26 Then
column_letter = ChrW(64 + MyColumn_Integer)
End If
If MyColumn_Integer > 26 Then
column_letter = ChrW(Int((MyColumn_Integer - 1) / 26) + 64) & ChrW(((MyColumn_Integer - 1) Mod 26) + 65)
End If
GetColumnLetter_ByInteger = column_letter
End Function
If you're asking for one sheet to detect the change in cell "A1" and then to hide/unhide columns on multiple sheets then the prior answers to your question will serve you nicely.
If, on the other hand, you're asking to detect a change in cell "A1" on any sheet and then to hide/unhide columns on just the changed sheet, then the code below will work for you. It accesses the Workbook_SheetChanged event at Workbook level.
A few points about your code:
You can reference cells using their integer or address values with the .Cell property, so Sheet1.Cells(1, 1) is the same as Sheet1.Cells(1, "A"). The same applies to the .Columns property. So there's no real need to convert your integer values to a string. See #Florent B's answer for a good example of this.
Wherever possible, minimise looping sheet interactions as these are very time-consuming. So rather than loop through the columns and hide/unhide each one individually, you could assign them to ranges within your loop and then hide/unhide the ranges all in one go at the end of your loop. If you must interact with the sheet on each iteration of your loop, then set the Application.ScreenUpdating property to false before the start of your loop. There's an example of this property in the sample code below.
Put this in your Workbook module:
Private Sub Workbook_SheetChange(ByVal Sh As Object, ByVal Target As Range)
Const TARGET_ADDRESS As String = "A1"
Dim cell As Range
Dim hiddenCols As Range
Dim unhiddenCols As Range
Dim selectedMonth As String
Dim monthInReview As String
Dim c As Integer
'Ignore event if not a target worksheet
If Sh.Name = "Not Wanted" Then Exit Sub
'Ignore event if not in target range
Set cell = Target.Cells(1)
If cell.Address(False, False) <> TARGET_ADDRESS Then Exit Sub
'Criteria met, so handle event
selectedMonth = CStr(cell.Value)
For c = 2 To 379
Set cell = Sh.Cells(1, c)
monthInReview = CStr(cell.Value)
'Add cell to hidden or unhidden ranges
If monthInReview = selectedMonth Then
If unhiddenCols Is Nothing Then
Set unhiddenCols = cell
Else
Set unhiddenCols = Union(unhiddenCols, cell)
End If
Else
If hiddenCols Is Nothing Then
Set hiddenCols = cell
Else
Set hiddenCols = Union(hiddenCols, cell)
End If
End If
Next
'Hide and unhide the cells
Application.ScreenUpdating = False 'not really needed here but given as example
If Not unhiddenCols Is Nothing Then
unhiddenCols.EntireColumn.Hidden = False
End If
If Not hiddenCols Is Nothing Then
hiddenCols.EntireColumn.Hidden = True
End If
Application.ScreenUpdating = True
End Sub
You can use a for each loop to loop through all the Worksheets, and check the worksheet name if it should be skipped. Then apply your code onto the sheet selected.
Something like:
Sub Worksheet_Change(ByVal Target As Range)
If Target.Address = "$A$1" Then
Dim ws As Worksheet
For Each ws In ActiveWorkbook.Worksheets
If ws.Name <> "Skip Sheet" Then
Dim the_selection As String
Dim month_in_review As String
the_selection = ws.Range("A1")
Dim Rep As Integer
For Rep = 2 To 379
the_column = GetColumnLetter_ByInteger(Rep)
month_in_review = ws.Range(the_column & "1")
If the_selection = month_in_review Then
ws.Range(the_column & ":" & the_column).EntireColumn.Hidden = False
Else
ws.Range(the_column & ":" & the_column).EntireColumn.Hidden = True
End If
Next Rep
End If
Next ws
End If
End Sub
I wasn't entirely sure what you wished to achieve, so i put ws in the place of Sheet1.
This example will show/hide the columns in all the other sheets if the first cell of the column match/differ with the cell A1 of the sheet where this code is placed:
Private Sub Worksheet_Change(ByVal Target As Range)
' exit if not cell A1
If Target.row <> 1 Or Target.column <> 1 Then Exit Sub
Dim sheet As Worksheet
Dim the_selection As String
Dim month_in_review As String
Dim column As Integer
the_selection = Target.Value
' iterate all the sheets
For Each sheet In ThisWorkbook.Worksheets
' skip this sheet
If Not sheet Is Me Then
' iterate the columns
For column = 2 To 379
' get the first cell of the column
month_in_review = sheet.Cells(1, column).Value
' hide or show the column if it's a match or not
sheet.Columns(column).Hidden = month_in_review <> the_selection
Next
End If
Next
End Sub

Compile error: searching for one specific data point, looping through whole workbook, copy/paste data

Excel VBA beginner coming back for more. I am creating a macro that does the following two things:
1) Searches through multiple worksheets in a single workbook for a specific piece of data (a name), variable A below
2) If that name appears, to copy a specific range of cells from the worksheet (variable X below) to the master file (variable B below)
Sub Pull_X_Click()
Dim A As Variant 'defines name
Dim B As Workbook 'defines destination file
Dim X As Workbook 'defines existing report file as source
Dim Destination As Range 'defines destination for data pulled from report
Dim ws As Worksheet
Dim rng As Range
A = Workbooks("B.xlsm").Worksheets("Summary").Range("A1").Value
Set B = Workbooks("B.xlsm")
Set X = Workbooks.Open("X.xlsm")
Set Destination = Workbooks("B").Worksheets("Input").Range("B2:S2")
'check if name is entered properly
If A = "" Then
MsgBox ("Your name is not visible; please start from the Reference tab.")
Worksheets("Reference").Activate
Exit Sub
End If
X.Activate
For Each ws In X.Worksheets
Set rng = ws.Range("A" & ws.Rows.Count).End(xlUp)
If InStr(1, rng, A) = 0 Then
Else
X.ActiveSheet.Range("$A$2:$DQ$11").AutoFilter Field:=1, Criteria1:=A
Range("A7:CD7").Select
Selection.Copy
Destination.Activate
Destination.PasteSpecial
End If
Next ws
Application.ScreenUpdating = False
End Sub
UPDATE: I managed to resolve the previous compile error, and it seems that the code (should?) work. However, it gets to this step:
X.Activate
...and then nothing happens. There's no run-time errors or anything, but it doesn't seem to be searching through the file (variable X) or pulling any of the data based on the presence of variable A. Any thoughts?
What I would've done is loop through the rows and evaluate the column in which the necessary data appears and then avoiding copy/paste just make the target range equal to the source range:
Sub SearchNCopy()
Dim A As String 'The String you are searching for
Dim b As String ' the string where you shall be searching
Dim wbs, wbt As Workbook ' Declare your workbooks
Dim wss As Worksheet
Dim i, lrow As Integer
Set wbt = Workbooks("B.xlsm") 'Set your workbooks
Set wbs = Workbooks.Open("X.xlsm")
A = wbt.Worksheets("Summary").Range("A1").Value
If A = "" Then
MsgBox ("Your name is not visible; please start from the Reference tab.")
Worksheets("Reference").Activate
Exit Sub
End If
For Each wss In wbs.Worksheets 'Loop through sheets
lrow = wss.Cells(wss.Rows.Count, "A").End(xlUp).Row 'Find last used row in each sheet - MAKE SURE YOUR SHEETS DONT HAVE BLANKS BETWEEN ENTIRES
For i = 1 To lrow Step 1 'Loop through the rows
b = wss.Range("A" & i).Value 'Assign the value to the variable from column a of the row
If Not InStr(1, b, A) = 0 Then 'Evaluate the value in the column a and if it contains the input string, do the following
wbt.Worksheets("Input").Range("B2:CC2") = wss.Range("A" & i & ":CD" & i) 'copies the range from one worksheet to another avoiding copy/paste (much faster)
End If
Next i
Next wss
End Sub

Resources