UPDATE: This doesn't work any more, not since the URL path to the TRs was changed. The path is now a bit random and I don't think it is possible to easily fix this. It was fun whilst it lasted!
A minor update of the NetApp
Technical Report (TR) PowerShell Collector Script: Version 2. I’ve been
doing a bit of blog house keeping and moved the NetApp TR catalogue into dedicated
pages, and have the links to these pages in the sidebar. The script is updated
to get the PDF titles from the new pages. The updated script allows me to have
multiple source pages, rather than one big one. The new pages have the HTML
links to the TRs.
Image: Pages
Sidebar (standard/non-mobile view)
The Script
##########################################
##
NetAppTR_Collector.ps1 (Version 2.1) ##
##########################################
#
This program will collect all the TRs from -
#
https://www.netapp.com/us/media/tr-XXXX.pdf
#
- where XXXX is specified by $RangeStart to $RangeEnd
#
Version 2 adds:
#
1) Checks for file existence on the TR-XXXX part (not the whole file name)
#
2) Automatically names the PDFs from
http://www.cosonok.com/2017/07/netapp-technical-reports-catalogue.html
#
3) Logs updated files and new files (as well as dead links - the log filename
has been changed)
#
Version 2.1:
#
1) PDF names now come from:
#
http://www.cosonok.com/p/netapp-trs-4500-4999.html
#
http://www.cosonok.com/p/netapp-trs-4000-4499.html
Param(
[Int]$RangeStart = 4000,
[Int]$RangeEnd = 4700,
[String]$DownloadFolder =
"C:\Downloads\NetAppTRs\",
[String]$LogFile =
"NetAppTR_Collector.log"
)
#
Checks if download folder path has \ at the end:
If($DownloadFolder
-match '.+?\\$'){}
else{$DownloadFolder
+= "\"}
[String]$LogFilePath
= $DownloadFolder + $LogFile
[System.Array]$CatalogURLs
= @()
$CatalogURLs
+= "http://www.cosonok.com/p/netapp-trs-4500-4999.html"
$CatalogURLs
+= "http://www.cosonok.com/p/netapp-trs-4000-4499.html"
#
Generic display function:
Function
Wr{Param($P,$I="WHITE");Write-Host $P -ForegroundColor $I}
##########################
##
TITLE EXTRACTER CODE ##
##########################
[System.Array]$URLdata
= @()
Foreach($URL
in $CatalogURLs){
Try{$WebData = Invoke-Webrequest $URL -Method
Get}
Catch{
Wr "FAILED: Unable to acquire catalog
from $URL!" RED;PAUSE;EXIT
}
$URLdata += $WebData
}
##
Get-Title function:
Function
Get-Title{
Foreach($WebPage in $URLdata){
[System.Array]$Arr =
$WebPage.Content.Split("`n")
$Catalog = $WebPage.Content
$Count = $Arr.Count
$Position = 0
For($k=$Position;$k -lt $Count;$k++){
If($Arr[$k].Contains("href")
-and $Arr[$k].Contains("TR-$($j):")){
$Position++
[String]$Title =
$Arr[$k].Split(">")[1]
$l=1
While($TRUE){
If($Title.Contains("<")){
$Title =
$Title.Split("<")[0]
RETURN $Title
}else{
$Title += " "
$Title += $Arr[$k+$l]
}
$l++
If(($k+$l) -ge $Count){
Wr "FAILED: Something went
wrong here!" RED;PAUSE;EXIT
}
}
}
}
}
RETURN "TR-$($j)"
}
##
Create hashtable of titles to number XXXX:
[System.Object]$Titles
= @{}
For($j=$RangeEnd;$j
-ge $RangeStart;$j--){
[String]$Titles."$j" =
"TR-$($j)"
$Titles."$j" = (Get-Title).Replace(":","
-").Replace("/",",")
}
#################################
##
PREPARE DOWNLOAD FOLDER/LOG ##
#################################
[Void](New-Item
-ItemType Directory -Force -Path $DownloadFolder)
If(Test-Path
$DownloadFolder){}
else{Wr
"FAILED: Test-Path $DownloadFolder" RED;PAUSE;EXIT}
If(Test-Path
$LogFilePath){
Copy-Item $LogFilePath ($LogFilePath +
".bak")
}else{
[Void](New-Item $LogFilePath -type file
-force)
}
######################
##
PDF RENAMER CODE ##
######################
Get-ChildItem
$DownloadFolder | Foreach{
$CIname = $_.Name
If($CIname.StartsWith("TR-","CurrentCultureIgnoreCase")){
[String]$TRnumber =
$CIname.Split("-")[1]
$TRnumber = $TRnumber.Substring(0,4)
If($Titles.$TRnumber){
If($CIname -ne ($Titles.$TRnumber +
".pdf")){
Wr "Renaming item:
$($DownloadFolder)$($CIname) to $($Titles.$TRnumber).pdf"
Rename-Item ($DownloadFolder + $CIname)
($Titles.$TRnumber + ".pdf")
}
}
}
}
#######################
##
TR COLLECTOR CODE ##
#######################
##
Download the TRs:
Import-Module
BitsTransfer
For($i=$RangeStart;
$i -le $RangeEnd; $i++){
$Title = "tr-$i"
$WebUrl =
"https://www.netapp.com/us/media/$Title.pdf"
$SavePath = ($DownloadFolder + $Titles."$i"
+ ".pdf")
## Get the header:
$hdr = $NULL
Try{$hdr = Invoke-WebRequest $WebUrl -Method
Head}
Catch{"Dead link: $WebUrl" |
Out-File $LogFilePath -Append}
## If we have a header:
If($hdr){
## Only download updated versions:
$Download = $FALSE
If(Test-Path $SavePath){
$SavedLRT = [DateTime]((Get-ChildItem
$SavePath).LastWriteTime)
$WebFileLRT =
[DateTime]($hdr.headers."last-modified")
If($WebFileLRT.Ticks -gt
$SavedLRT.Ticks){
Wr "UPDATED : $Title" GREEN
"Updated : $Title" | Out-File $LogFilePath
-Append
$Download = $TRUE
}else{
Wr "NO UPD. : $Title"
}
}else{
Wr "NEW D/LOAD: $Title" GREEN
"New D/L : $Title" | Out-File $LogFilePath
-Append
$Download = $TRUE
}
## Download:
If($Download){
Start-BitsTransfer -Source $WebUrl
-Destination $SavePath
}
}else{
Wr "DEAD LINK : $WebUrl"
}
}
PAUSE
Comments
Post a Comment