Files
zf-sql/backupmult.ps1
2025-10-24 14:49:14 +01:00

556 lines
22 KiB
PowerShell

param(
[Parameter(Mandatory=$true)]
[string]$SqlInstance,
[Parameter(Mandatory=$true)]
[string]$MvName,
[Parameter(Mandatory=$false)]
[int]$Jobs = 2
)
#
# backupmult.ps1 - Parallel database backup script using Ola H
#
# Uses Ola H's built-in parallel processing by starting multiple concurrent backup jobs
# Each job will automatically share the database load using DatabasesInParallel=Y
# TODO: Log file management (don't just overwrite existing logs)
# TODO: Dynmically figure out MV channels and paths using Rubrik API
# TODO: See if there is way to query QueueDatabase during backup to monitor progress
$fullBackupDay = 'Thursday'
$fullBackupOverdueDays = 7
$instanceName = $SqlInstance.Split('\')[1]
$logFile = "C:\Rubrik\backup-multi-$instanceName.log"
$SAFile = "C:\Rubrik\scripts\rbksql.xml"
function Write-Log($message, $jobId = "") {
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$jobPrefix = if ($jobId) { "[JOB-$jobId] " } else { "" }
$logEntry = "$timestamp $jobPrefix$message"
# Use mutex for thread-safe logging to main log file
$mutex = $null
try {
$mutex = [System.Threading.Mutex]::new($false, "BackupLogMutex")
if ($mutex.WaitOne(5000)) { # 5 second timeout
Add-Content -Path $logFile -Value $logEntry -Encoding UTF8
} else {
Write-Warning "Could not acquire log mutex, writing to console only"
}
} catch {
Write-Warning "Logging error: $($_.Exception.Message)"
} finally {
if ($mutex) {
$mutex.ReleaseMutex()
$mutex.Dispose()
}
}
Write-Host $logEntry
}
# Import SQL Server PowerShell module
try {
if (Get-Module -ListAvailable -Name SqlServer) {
Import-Module SqlServer -ErrorAction Stop
Write-Log "INFO: SqlServer PowerShell module loaded successfully."
}
elseif (Get-Module -ListAvailable -Name SQLPS) {
Import-Module SQLPS -ErrorAction Stop
Write-Log "INFO: SQLPS PowerShell module loaded successfully."
}
else {
throw "No SQL Server PowerShell module found"
}
if (-not (Get-Command Invoke-Sqlcmd -ErrorAction SilentlyContinue)) {
throw "Invoke-Sqlcmd command not available"
}
}
catch {
Write-Host "ERROR: Failed to import SQL Server PowerShell module. Please install it using: Install-Module -Name SqlServer -AllowClobber"
Write-Host "ERROR: $($_.Exception.Message)"
exit 1
}
# Import Rubrik Security Cloud module
try {
Import-Module RubrikSecurityCloud -ErrorAction Stop
Write-Log "INFO: RubrikSecurityCloud module loaded successfully."
} catch {
Write-Log "ERROR: Failed to import RubrikSecurityCloud module. $($_.Exception.Message)"
exit 1
}
$localNode = $env:COMPUTERNAME
$clusterInstance = Get-ClusterResource | Where-Object { $_.ResourceType -eq "SQL Server" -and $_.Name -eq "SQL Server ($instanceName)" }
if ($clusterInstance) {
$ownerNode = $clusterInstance.OwnerNode
if ($ownerNode -ne $localNode) {
Write-Log "SQL instance '$SqlInstance' is not running on local node '$localNode'. Updating the MV."
Connect-Rsc -ServiceAccountFile $SAFile
Write-Log "Connected to Rubrik Security Cloud."
$newHost = Get-RscHost -Name $ownerNode -OsType WINDOWS
$query = New-RscQuery -GqlQuery slaManagedVolumes -AddField Nodes.HostDetail, Nodes.SmbShare, Nodes.ClientConfig, Nodes.ClientConfig.BackupScript, Nodes.ClientConfig.PreBackupScript
$query.var.filter = @(Get-RscType -Name Filter)
$query.var.filter[0].field = "NAME_EXACT_MATCH"
$query.var.filter[0].Texts = $mvName
$mvDetail = $query.Invoke().nodes[0]
Write-Log "Found Managed Volume: $($mvDetail.Name) (ID: $($mvDetail.Id), Status: $($mvDetail.hostDetail.Status), HostDetail Name: $($mvDetail.hostDetail.Name))"
$query = New-RscMutation -GqlMutation updateManagedVolume
$query.Var.input = Get-RscType -Name UpdateManagedVolumeInput
$query.Var.input.update = Get-RscType -Name ManagedVolumeUpdateInput
$query.Var.input.update.config = Get-RscType -Name ManagedVolumePatchConfigInput
$query.Var.input.update.slaClientConfig = Get-RscType -Name ManagedVolumePatchSlaClientConfigInput
$query.Var.input.Id = $mvDetail.Id
$query.Var.input.update.Name = $mvName
$query.Var.input.update.config.SmbDomainName = $mvDetail.SmbShare.DomainName
$query.Var.input.update.config.SmbValidIps = $newHost.Name
$query.Var.input.update.config.SmbValidUsers = $mvDetail.SmbShare.ValidUsers + $mvDetail.SmbShare.ActiveDirectoryGroups
$query.Var.input.update.slaClientConfig.clientHostId = $newHost.Id
$query.Var.input.update.slaClientConfig.channelHostMountPaths = $mvDetail.ClientConfig.ChannelHostMountPaths
$query.Var.input.update.slaClientConfig.backupScriptCommand = $mvDetail.ClientConfig.BackupScript.ScriptCommand
$query.Var.input.update.slaClientConfig.preBackupScriptCommand = $mvDetail.ClientConfig.PreBackupScript.ScriptCommand
$query.Var.input.update.slaClientConfig.preBackupScriptTimeout = $mvDetail.ClientConfig.PreBackupScript.Timeout
$query.Var.input.update.slaClientConfig.shouldDisablePostBackupScriptOnBackupFailure = $true
$query.Var.input.update.slaClientConfig.shouldDisablePostBackupScriptOnBackupSuccess = $true
$query.Var.input.update.slaClientConfig.shouldDisablePreBackupScript = $false
$query.Var.input.update.slaClientConfig.shouldCancelBackupOnPreBackupScriptFailure = $mvDetail.ClientConfig.ShouldCancelBackupOnPreBackupScriptFailure
$query.gqlRequest().Variables
if (-not $dryrun) {
$result = $query.Invoke()
} else {
Write-Log "Dry run mode: Managed Volume update not invoked."
}
# Now must exit 1 to stop the backup continuing on the wrong node
Disconnect-Rsc
exit 1
} else {
Write-Log "SQL instance '$SqlInstance' is running on local node '$localNode'. No action needed."
}
} else {
Write-Log "ERROR: SQL instance '$SqlInstance' not found in cluster resources."
exit 1
}
# Connect to Rubrik and retrieve managed volume paths
try {
Connect-Rsc -ServiceAccountFile $SAFile
Write-Log "INFO: Connected to Rubrik Security Cloud."
$query = New-RscQuery -GqlQuery slaManagedVolumes -AddField Nodes.HostDetail, Nodes.SmbShare, Nodes.ClientConfig, Nodes.ClientConfig.BackupScript, Nodes.ClientConfig.PreBackupScript
$query.var.filter = @(Get-RscType -Name Filter)
$query.var.filter[0].field = "NAME_EXACT_MATCH"
$query.var.filter[0].Texts = $MvName
$mvDetail = $query.Invoke().nodes[0]
$paths = $mvDetail.ClientConfig.ChannelHostMountPaths
Write-Log "INFO: Retrieved paths: $($paths -join ', ')"
} catch {
Write-Log "ERROR: Failed to retrieve paths from Rubrik. $($_.Exception.Message)"
exit 1
}
$directoryParam = $paths -join ', '
# Validate job count
if ($Jobs -lt 1 -or $Jobs -gt 8) {
Write-Host "ERROR: Jobs parameter must be between 1 and 8. Provided: $Jobs"
exit 1
}
Write-Host "INFO: Starting $Jobs parallel backup jobs"
$today = (Get-Date).Date
function Get-BackupType($directoryParam) {
# Use first directory to check flags (assuming shared flag logic across all directories)
$firstDir = ($directoryParam -split ',')[0].Trim()
$fullFlag = Join-Path $firstDir "last_full.flag"
$diffFlag = Join-Path $firstDir "last_diff.flag"
# Check if full backup is overdue
$isFullBackupOverdue = $false
if (Test-Path $fullFlag) {
try {
$lastFullDate = [DateTime]::ParseExact((Get-Content $fullFlag).Trim(), "yyyy-MM-dd", $null)
$daysSinceLastFull = ($today - $lastFullDate).Days
$isFullBackupOverdue = $daysSinceLastFull -gt $fullBackupOverdueDays
Write-Log "INFO: Last full backup was $daysSinceLastFull days ago. Overdue threshold: $fullBackupOverdueDays days."
}
catch {
$isFullBackupOverdue = $true
Write-Log "WARNING: Could not parse last full backup date. Treating as overdue."
}
} else {
$isFullBackupOverdue = $true
Write-Log "WARNING: No last full backup date found. Treating as overdue."
}
# Determine backup type
if ((Get-Date).DayOfWeek -eq $fullBackupDay -or $isFullBackupOverdue) {
if (-not (Test-Path $fullFlag) -or (Get-Content $fullFlag).Trim() -ne $today.ToString("yyyy-MM-dd")) {
# Create flag directory if it doesn't exist
$flagDir = Split-Path $fullFlag -Parent
if (-not (Test-Path $flagDir)) {
New-Item -ItemType Directory -Path $flagDir -Force | Out-Null
}
Set-Content $fullFlag $today.ToString("yyyy-MM-dd") -Encoding UTF8
$reason = if($isFullBackupOverdue) { "overdue" } else { "scheduled" }
return @{ Type = "FULL"; CleanupTime = 168; Reason = $reason }
} else {
return @{ Type = "LOG"; CleanupTime = 24; Reason = "full already taken today" }
}
} else {
if (-not (Test-Path $diffFlag) -or (Get-Content $diffFlag).Trim() -ne $today.ToString("yyyy-MM-dd")) {
# Create flag directory if it doesn't exist
$flagDir = Split-Path $diffFlag -Parent
if (-not (Test-Path $flagDir)) {
New-Item -ItemType Directory -Path $flagDir -Force | Out-Null
}
Set-Content $diffFlag $today.ToString("yyyy-MM-dd") -Encoding UTF8
return @{ Type = "DIFF"; CleanupTime = 168; Reason = "differential scheduled" }
} else {
return @{ Type = "LOG"; CleanupTime = 24; Reason = "diff already taken today" }
}
}
}
# Determine backup type
$backupInfo = Get-BackupType $directoryParam
Write-Log "Selected $($backupInfo.Type) backup ($($backupInfo.Reason))"
# Build the Ola H query with DatabasesInParallel enabled
$query = @"
EXECUTE [dbo].[DatabaseBackup]
@Databases = 'ALL_DATABASES',
@Directory = '$directoryParam',
@BackupType = '$($backupInfo.Type)',
@Verify = 'N',
@CleanupTime = $($backupInfo.CleanupTime),
@CheckSum = 'Y',
@LogToTable = 'Y',
@DatabasesInParallel = 'Y'
"@
Write-Log "SQL Query: $query"
# Function to execute backup job with message capture
function Start-BackupJob {
param(
[int]$jobId,
[string]$sqlInstance,
[string]$query,
[string]$baseLogFile
)
$scriptBlock = {
param($JobId, $SqlInstance, $Query, $BaseLogFile)
# Debug the base log file parameter
Write-Output "DEBUG: BaseLogFile parameter = '$BaseLogFile'"
# Create job-specific log file path with fallback
if ($BaseLogFile -and $BaseLogFile.Trim() -ne "") {
$jobLogFile = $BaseLogFile -replace '\.log$', "-job$JobId.log"
} else {
# Fallback log file path
$jobLogFile = "C:\Rubrik\backup-multi-job$JobId.log"
}
Write-Output "DEBUG: Job log file will be: '$jobLogFile'"
function Write-JobLog($message, $suppressConsole = $false) {
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$logEntry = "$timestamp [JOB-$JobId] $message"
if ($jobLogFile -and $jobLogFile.Trim() -ne "") {
try {
Add-Content -Path $jobLogFile -Value $logEntry -Encoding UTF8
# Output to console for debugging (unless suppressed)
if (-not $suppressConsole) {
Write-Output "LOGGED TO $jobLogFile : $logEntry"
}
} catch {
Write-Output "LOG ERROR: $($_.Exception.Message) - File: $jobLogFile"
}
} else {
Write-Output "NO LOG FILE: jobLogFile is empty or null"
}
# Always output to console for job monitoring (unless suppressed)
if (-not $suppressConsole) {
Write-Output $logEntry
}
}
try {
Write-JobLog "Starting backup job"
# Create SQL connection with message capture
$connection = New-Object System.Data.SqlClient.SqlConnection
$connection.ConnectionString = "Server=$SqlInstance;Integrated Security=true;Connection Timeout=30"
$infoMessages = @()
# Event handler for informational messages (PRINT statements)
$connection.add_InfoMessage({
param($sqlSender, $e)
$message = $e.Message
if ($message -and $message.Trim() -ne "") {
$script:infoMessages += $message
Write-JobLog "SQL INFO: $message" $true # Suppress console output for verbose messages
}
})
try {
Write-JobLog "Attempting to connect to SQL Server: $SqlInstance"
$connection.Open()
Write-JobLog "Connected to SQL Server successfully"
$command = New-Object System.Data.SqlClient.SqlCommand
$command.Connection = $connection
$command.CommandText = $Query
$command.CommandTimeout = 0 # No timeout for backup operations
Write-JobLog "Executing backup command..."
# Execute and capture any result sets
$reader = $command.ExecuteReader()
# Process any result sets
while ($reader.Read()) {
$rowData = @()
for ($i = 0; $i -lt $reader.FieldCount; $i++) {
$rowData += "$($reader.GetName($i)): $($reader.GetValue($i))"
}
if ($rowData.Count -gt 0) {
$resultLine = "SQL RESULT: $($rowData -join ', ')"
Write-JobLog $resultLine
Write-Output $resultLine # Also output for Receive-Job
}
}
$reader.Close()
$summaryMessage = "Backup completed successfully. Captured $($infoMessages.Count) messages."
Write-JobLog $summaryMessage
Write-Output $summaryMessage # Also output for Receive-Job
# Output all captured SQL messages for debugging (only to log file, not console)
Write-JobLog "=== SQL MESSAGES START ===" $true
foreach ($msg in $infoMessages) {
Write-JobLog "SQL: $msg" $true
}
Write-JobLog "=== SQL MESSAGES END ===" $true
# Don't return hashtable - just output success message
Write-Output "JOB-${JobId}: SUCCESS"
}
finally {
if ($connection.State -eq [System.Data.ConnectionState]::Open) {
$connection.Close()
}
$connection.Dispose()
}
}
catch {
$errorMessage = "ERROR: Backup failed - $($_.Exception.Message)"
Write-JobLog $errorMessage
Write-Output $errorMessage # Also output for Receive-Job
# Check for specific connection errors
if ($_.Exception.Message -like "*server*not found*" -or
$_.Exception.Message -like "*network-related*" -or
$_.Exception.Message -like "*instance*" -or
$_.Exception.Message -like "*login*failed*") {
$connError = "ERROR: CONNECTION FAILURE - Check SQL Server instance name and connectivity"
Write-JobLog $connError
Write-Output $connError
}
# Log SQL Server specific errors
if ($_.Exception -is [System.Data.SqlClient.SqlException]) {
Write-JobLog "ERROR: SQL Server Error Details:"
Write-Output "ERROR: SQL Server Error Details:"
foreach ($sqlError in $_.Exception.Errors) {
$errorDetail = "ERROR: Severity: $($sqlError.Class), State: $($sqlError.State), Number: $($sqlError.Number)"
Write-JobLog $errorDetail
Write-Output $errorDetail
$errorMsg = "ERROR: Message: $($sqlError.Message)"
Write-JobLog $errorMsg
Write-Output $errorMsg
if ($sqlError.Procedure) {
$procError = "ERROR: Procedure: $($sqlError.Procedure), Line: $($sqlError.LineNumber)"
Write-JobLog $procError
Write-Output $procError
}
}
}
# Log full exception details for debugging
$fullError = "ERROR: Full Exception Type: $($_.Exception.GetType().Name)"
Write-JobLog $fullError
Write-Output $fullError
if ($_.Exception.InnerException) {
$innerError = "ERROR: Inner Exception: $($_.Exception.InnerException.Message)"
Write-JobLog $innerError
Write-Output $innerError
}
Write-Output "JOB-${JobId}: FAILED"
}
}
return Start-Job -ScriptBlock $scriptBlock -ArgumentList $jobId, $sqlInstance, $query, $baseLogFile
}
# Start parallel backup jobs
Write-Log "Starting $Jobs parallel backup jobs using DatabasesInParallel feature"
[System.Collections.ArrayList]$jobList = @()
for ($i = 1; $i -le $Jobs; $i++) {
$job = Start-BackupJob -jobId $i -sqlInstance $SqlInstance -query $query -baseLogFile $logFile
$null = $jobList.Add($job)
Write-Log "Started backup job $i (Job ID: $($job.Id))"
Start-Sleep -Milliseconds 100 # Small delay to stagger job starts
}
# Monitor jobs and capture output
Write-Log "Monitoring $($jobList.Count) backup jobs..."
$allJobsCompleted = $false
[System.Collections.ArrayList]$completedJobs = @()
while (-not $allJobsCompleted) {
Start-Sleep -Seconds 5
foreach ($job in $jobList) {
if ($job.Id -notin $completedJobs) {
# Check if job is no longer running
if ($job.State -eq "Completed" -or $job.State -eq "Failed" -or $job.State -eq "Stopped") {
$null = $completedJobs.Add($job.Id)
# Get all job output
$jobOutput = Receive-Job -Job $job -Keep # Use -Keep to preserve output
if ($job.State -eq "Completed") {
Write-Log "Job $($job.Id) completed successfully"
# Log all job output to main log
if ($jobOutput) {
Write-Log "=== Job $($job.Id) Output ==="
foreach ($line in $jobOutput) {
Write-Log "$line"
}
Write-Log "=== End Job $($job.Id) Output ==="
}
} else {
Write-Log "ERROR: Job $($job.Id) failed with state: $($job.State)"
if ($jobOutput) {
Write-Log "=== Job $($job.Id) Error Output ==="
foreach ($line in $jobOutput) {
Write-Log "ERROR: $line"
}
Write-Log "=== End Job $($job.Id) Error Output ==="
}
}
}
}
}
$allJobsCompleted = $completedJobs.Count -eq $jobList.Count
# Progress update
$runningCount = ($jobList | Where-Object { $_.State -eq "Running" }).Count
if ($runningCount -gt 0) {
Write-Log "Progress: $($completedJobs.Count)/$($jobList.Count) jobs completed, $runningCount still running..."
}
}
Write-Log "All backup jobs completed"
# Collect job states and outputs before cleanup for final status check
$jobResults = @{}
foreach ($job in $jobList) {
$jobOutput = Receive-Job -Job $job -Keep -ErrorAction SilentlyContinue
$hasFailed = $false
# Check if job output contains failure indicator
if ($jobOutput) {
foreach ($line in $jobOutput) {
if ($line -like "*JOB-*: FAILED") {
$hasFailed = $true
break
}
}
}
$jobResults[$job.Id] = @{
State = $job.State
Failed = $hasFailed
}
}
# Clean up jobs
Write-Log "Cleaning up completed jobs..."
foreach ($job in $jobList) {
try {
if ($job.State -eq "Running") {
Write-Log "WARNING: Job $($job.Id) still running, stopping it..."
Stop-Job -Job $job -Force
Start-Sleep -Seconds 2
}
Remove-Job -Job $job -Force -ErrorAction SilentlyContinue
Write-Log "Cleaned up job $($job.Id)"
} catch {
Write-Log "WARNING: Could not clean up job $($job.Id): $($_.Exception.Message)"
}
}
# Consolidate job logs into main log file
Write-Log "Consolidating job logs..."
for ($i = 1; $i -le $Jobs; $i++) {
$jobLogFile = $logFile -replace '\.log$', "-job$i.log"
Write-Log "Checking for job log file: $jobLogFile"
if (Test-Path $jobLogFile) {
try {
$jobContent = Get-Content $jobLogFile -ErrorAction Stop
Write-Log "Found $($jobContent.Count) lines in job $i log"
foreach ($line in $jobContent) {
Add-Content -Path $logFile -Value $line -Encoding UTF8
}
Remove-Item $jobLogFile -Force
Write-Log "Consolidated log from job $i"
} catch {
Write-Log "WARNING: Could not consolidate log from job $i : $($_.Exception.Message)"
}
} else {
Write-Log "WARNING: Job log file not found for job $i"
}
}
# Final status check using job output analysis
$failedJobIds = $jobResults.Keys | Where-Object { $jobResults[$_].Failed -eq $true }
if ($failedJobIds.Count -gt 0) {
Write-Log "ERROR: $($failedJobIds.Count) out of $($jobResults.Count) backup jobs failed"
foreach ($jobId in $failedJobIds) {
Write-Log "ERROR: Job ID $jobId failed"
}
Write-Log "CRITICAL: Backup operation failed - check errors above"
exit 1
} else {
Write-Log "SUCCESS: All $($jobResults.Count) backup jobs completed successfully"
}