Files
zf-sql/release_v1/backup.ps1

742 lines
29 KiB
PowerShell

param(
[Parameter(Mandatory=$true)]
[string]$SqlInstance,
[Parameter(Mandatory=$true)]
[string]$MvName,
[Parameter(Mandatory=$false)]
[int]$Jobs = 1,
[Parameter(Mandatory=$false)]
[int]$LogRetentionDays = 30
,
[Parameter(Mandatory=$false)]
[switch]$Nuke
)
# backup.ps1 - Parallel database backup script using Ola H
#
# Uses Ola H's built-in parallel processing by starting multiple concurrent backup jobs
# Each job will automatically share the database load using DatabasesInParallel=Y if Jobs>1
# TODO: See if there is way to query QueueDatabase during backup to monitor progress
# TODO: Better trapping when RSC connection fails
$fullBackupDay = 'Thursday'
$fullBackupOverdueDays = 7
$SAFile = "C:\Rubrik\scripts\rbksql.xml"
$logDir = "C:\Rubrik\logs"
function Write-Log($message, $jobId = "") {
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$jobPrefix = if ($jobId) { "[JOB-$jobId] " } else { "" }
$logEntry = "$timestamp $jobPrefix$message"
# Use mutex for thread-safe logging to main log file
$mutex = $null
try {
$mutex = [System.Threading.Mutex]::new($false, "BackupLogMutex")
if ($mutex.WaitOne(5000)) { # 5 second timeout
Add-Content -Path $logFile -Value $logEntry -Encoding UTF8
} else {
Write-Warning "Could not acquire log mutex, writing to console only"
}
} catch {
Write-Warning "Logging error: $($_.Exception.Message)"
} finally {
if ($mutex) {
$mutex.ReleaseMutex()
$mutex.Dispose()
}
}
Write-Host $logEntry
}
# Parse instance name from SQL instance parameter
$instanceParts = $SqlInstance -split '\\'
if ($instanceParts.Length -eq 2) {
# Format: HOSTNAME\INSTANCENAME or CLUSTERNAME\INSTANCENAME
$instanceName = $instanceParts[1]
# SqlInstance stays as provided (HOSTNAME\INSTANCENAME)
} elseif ($instanceParts.Length -eq 1) {
# Single value provided - could be hostname (default instance) or instance name on local host
$singleValue = $instanceParts[0]
# If it's "MSSQLSERVER" (default instance name), treat as default instance on local host
if ($singleValue -eq "MSSQLSERVER") {
$instanceName = "MSSQLSERVER"
$SqlInstance = $env:COMPUTERNAME # Connect to default instance (no instance name)
} else {
# Assume it's an instance name on the local host
$instanceName = $singleValue
$SqlInstance = "$($env:COMPUTERNAME)\$singleValue"
}
} else {
$instanceName = $SqlInstance.Replace('\\', '_').Replace('/', '_')
}
if ([string]::IsNullOrEmpty($instanceName)) {
Write-Host "ERROR: Could not determine instance name from SqlInstance: '$SqlInstance'"
exit 1
}
# Sanitize and trim the instance name for safe filenames
$instanceName = $instanceName.Trim()
$invalidChars = [IO.Path]::GetInvalidFileNameChars()
foreach ($c in $invalidChars) {
$escaped = [regex]::Escape($c)
$instanceName = $instanceName -replace $escaped, '_'
}
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
# Ensure log directory exists before building/using log file
if (-not (Test-Path $logDir)) {
try {
New-Item -ItemType Directory -Path $logDir -Force | Out-Null
} catch {
Write-Host "ERROR: Could not create log directory $logDir : $($_.Exception.Message)"
exit 1
}
}
$logFileName = "backup_{0}_{1}.log" -f $instanceName, $timestamp
$logFile = Join-Path $logDir $logFileName
Write-Log "DEBUG: SqlInstance='$SqlInstance', instanceName='$instanceName', logFile='$logFile'"
# Function to clean up old log files
function Remove-OldLogs {
param([int]$retentionDays)
if (-not (Test-Path $logDir)) {
try {
New-Item -ItemType Directory -Path $logDir -Force | Out-Null
Write-Log "INFO: Created log directory: $logDir"
} catch {
Write-Log "ERROR: Failed to create log directory $logDir. $($_.Exception.Message)"
return
}
}
$cutoffDate = (Get-Date).AddDays(-$retentionDays)
Write-Log "INFO: Cleaning up log files older than $retentionDays days (before $($cutoffDate.ToString('yyyy-MM-dd')))"
$oldLogs = Get-ChildItem -Path $logDir -Filter "*.log" | Where-Object { $_.LastWriteTime -lt $cutoffDate }
$deletedCount = 0
foreach ($logFile in $oldLogs) {
try {
Remove-Item $logFile.FullName -Force
$deletedCount++
} catch {
Write-Log "WARNING: Failed to delete old log file $($logFile.Name): $($_.Exception.Message)"
}
}
Write-Log "INFO: Cleaned up $deletedCount old log files"
}
# Clean up old logs before starting
Remove-OldLogs -retentionDays $LogRetentionDays
# Import SQL Server PowerShell module
try {
if (Get-Module -ListAvailable -Name SqlServer) {
Import-Module SqlServer -ErrorAction Stop
Write-Log "INFO: SqlServer PowerShell module loaded successfully."
}
elseif (Get-Module -ListAvailable -Name SQLPS) {
Import-Module SQLPS -ErrorAction Stop
Write-Log "INFO: SQLPS PowerShell module loaded successfully."
}
else {
throw "No SQL Server PowerShell module found"
}
if (-not (Get-Command Invoke-Sqlcmd -ErrorAction SilentlyContinue)) {
throw "Invoke-Sqlcmd command not available"
}
}
catch {
Write-Log "ERROR: Failed to import SQL Server PowerShell module. Please install it using: Install-Module -Name SqlServer -AllowClobber"
Write-Log "ERROR: $($_.Exception.Message)"
exit 1
}
# Import Rubrik Security Cloud module
try {
Import-Module RubrikSecurityCloud -ErrorAction Stop
Write-Log "INFO: RubrikSecurityCloud module loaded successfully."
} catch {
Write-Log "ERROR: Failed to import RubrikSecurityCloud module. $($_.Exception.Message)"
exit 1
}
$localNode = $env:COMPUTERNAME
$clusterInstance = Get-ClusterResource | Where-Object { $_.ResourceType -eq "SQL Server" -and $_.Name -eq "SQL Server ($instanceName)" }
if ($clusterInstance) {
$ownerNode = $clusterInstance.OwnerNode
if ($ownerNode -ne $localNode) {
Write-Log "SQL instance '$SqlInstance' is not running on local node '$localNode'. Updating the MV."
Connect-Rsc -ServiceAccountFile $SAFile
Write-Log "Connected to Rubrik Security Cloud."
$newHost = Get-RscHost -Name $ownerNode -OsType WINDOWS
$query = New-RscQuery -GqlQuery slaManagedVolumes -AddField Nodes.HostDetail, Nodes.SmbShare, Nodes.ClientConfig, Nodes.ClientConfig.BackupScript, Nodes.ClientConfig.PreBackupScript
$query.var.filter = @(Get-RscType -Name Filter)
$query.var.filter[0].field = "NAME_EXACT_MATCH"
$query.var.filter[0].Texts = $mvName
$mvResult = $query.Invoke()
if (-not $mvResult.nodes -or $mvResult.nodes.Count -eq 0) {
Write-Log "ERROR: Managed Volume '$mvName' not found. This may be due to insufficient permissions or the volume not existing."
Disconnect-Rsc
exit 1
}
$mvDetail = $mvResult.nodes[0]
Write-Log "Found Managed Volume: $($mvDetail.Name) (ID: $($mvDetail.Id), Status: $($mvDetail.hostDetail.Status), HostDetail Name: $($mvDetail.hostDetail.Name))"
$query = New-RscMutation -GqlMutation updateManagedVolume
$query.Var.input = Get-RscType -Name UpdateManagedVolumeInput
$query.Var.input.update = Get-RscType -Name ManagedVolumeUpdateInput
$query.Var.input.update.config = Get-RscType -Name ManagedVolumePatchConfigInput
$query.Var.input.update.slaClientConfig = Get-RscType -Name ManagedVolumePatchSlaClientConfigInput
$query.Var.input.Id = $mvDetail.Id
$query.Var.input.update.Name = $mvName
$query.Var.input.update.config.SmbDomainName = $mvDetail.SmbShare.DomainName
$query.Var.input.update.config.SmbValidIps = $newHost.Name
$query.Var.input.update.config.SmbValidUsers = $mvDetail.SmbShare.ValidUsers + $mvDetail.SmbShare.ActiveDirectoryGroups
$query.Var.input.update.slaClientConfig.clientHostId = $newHost.Id
$query.Var.input.update.slaClientConfig.channelHostMountPaths = $mvDetail.ClientConfig.ChannelHostMountPaths
$query.Var.input.update.slaClientConfig.backupScriptCommand = $mvDetail.ClientConfig.BackupScript.ScriptCommand
# Only set pre-backup script fields if a pre-backup script was configured
if ($mvDetail.ClientConfig.PreBackupScript.ScriptCommand) {
$query.Var.input.update.slaClientConfig.preBackupScriptCommand = $mvDetail.ClientConfig.PreBackupScript.ScriptCommand
$query.Var.input.update.slaClientConfig.preBackupScriptTimeout = $mvDetail.ClientConfig.PreBackupScript.Timeout
$query.Var.input.update.slaClientConfig.shouldCancelBackupOnPreBackupScriptFailure = $mvDetail.ClientConfig.ShouldCancelBackupOnPreBackupScriptFailure
$query.Var.input.update.slaClientConfig.shouldDisablePreBackupScript = $false
} else {
$query.Var.input.update.slaClientConfig.shouldDisablePreBackupScript = $true
}
$query.Var.input.update.slaClientConfig.shouldDisablePostBackupScriptOnBackupFailure = $true
$query.Var.input.update.slaClientConfig.shouldDisablePostBackupScriptOnBackupSuccess = $true
$query.gqlRequest().Variables
if (-not $dryrun) {
$query.Invoke()
} else {
Write-Log "Dry run mode: Managed Volume update not invoked."
}
# Now must exit 1 to stop the backup continuing on the wrong node
Disconnect-Rsc
exit 1
} else {
Write-Log "SQL instance '$SqlInstance' is running on local node '$localNode'. No action needed."
}
} else {
Write-Log "ERROR: SQL instance '$SqlInstance' not found in cluster resources. Continuing assuming standalone instance."
}
# Connect to Rubrik and retrieve managed volume paths
try {
Connect-Rsc -ServiceAccountFile $SAFile
Write-Log "INFO: Connected to Rubrik Security Cloud."
$query = New-RscQuery -GqlQuery slaManagedVolumes -AddField Nodes.HostDetail, Nodes.SmbShare, Nodes.ClientConfig, Nodes.ClientConfig.BackupScript, Nodes.ClientConfig.PreBackupScript
$query.var.filter = @(Get-RscType -Name Filter)
$query.var.filter[0].field = "NAME_EXACT_MATCH"
$query.var.filter[0].Texts = $MvName
$mvDetail = $query.Invoke()
if (-not $mvDetail.nodes -or $mvDetail.nodes.Count -eq 0) {
Write-Log "ERROR: Managed Volume '$MvName' not found. This may be due to insufficient permissions or the volume not existing."
exit 1
}
$paths = $mvDetail.nodes[0].ClientConfig.ChannelHostMountPaths
Write-Log "INFO: Retrieved paths: $($paths -join ', ')"
} catch {
Write-Log "ERROR: Failed to retrieve paths from Rubrik. $($_.Exception.Message)"
exit 1
}
# If -Nuke is set, delete the contents of each retrieved path (but keep the folder itself).
if ($Nuke) {
Write-Log "INFO: -nuke flag set. Beginning recursive deletion of contents for retrieved paths."
foreach ($p in $paths) {
if (-not $p) { continue }
$pathToCheck = $p.Trim()
# Determine root to avoid deleting drive root like C:\
try { $root = [IO.Path]::GetPathRoot($pathToCheck) } catch { $root = $null }
if ([string]::IsNullOrEmpty($pathToCheck)) {
Write-Log "WARNING: Skipping empty path entry"
continue
}
if ($root -and ($pathToCheck.TrimEnd('\') -eq $root.TrimEnd('\'))) {
Write-Log "ERROR: Refusing to nuke root path '$pathToCheck'. Skipping."
continue
}
if (-not (Test-Path -LiteralPath $pathToCheck)) {
Write-Log "WARNING: Path '$pathToCheck' does not exist. Skipping."
continue
}
Write-Log "INFO: NUKING contents of '$pathToCheck' (deleting all files & subfolders inside)."
try {
# Enumerate children and delete each item so the folder itself remains
Get-ChildItem -LiteralPath $pathToCheck -Force -ErrorAction SilentlyContinue | ForEach-Object {
try {
Remove-Item -LiteralPath $_.FullName -Recurse -Force -ErrorAction Stop
Write-Log "INFO: Deleted: $($_.FullName)"
} catch {
Write-Log "WARNING: Failed to delete $($_.FullName): $($_.Exception.Message)"
}
}
} catch {
Write-Log "ERROR: Failed to enumerate or delete contents of '$pathToCheck': $($_.Exception.Message)"
}
}
Write-Log "INFO: -nuke operation complete. Continuing with backup flow."
}
$directoryParam = $paths -join ', '
# Validate job count
if ($Jobs -lt 1 -or $Jobs -gt 4) {
Write-Log "ERROR: Jobs parameter must be between 1 and 4. Provided: $Jobs"
exit 1
}
Write-Log "INFO: Starting $Jobs parallel backup jobs"
$today = (Get-Date).Date
function Get-BackupType($directoryParam) {
# Support multiple candidate directories. Scan them in deterministic order for existing flags.
$dirs = @()
if ($directoryParam) {
$dirs = $directoryParam -split ',' | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne '' }
}
# Build lists of found flags (in candidate order)
$foundFull = @()
$foundDiff = @()
foreach ($d in $dirs) {
$full = Join-Path $d "last_full.flag"
$diff = Join-Path $d "last_diff.flag"
if (Test-Path $full) { $foundFull += $full }
if (Test-Path $diff) { $foundDiff += $diff }
}
# Determine if full backup is overdue using the first-found full flag (if any)
$isFullBackupOverdue = $false
if ($foundFull.Count -gt 0) {
$fullFlag = $foundFull[0]
try {
$lastFullDate = [DateTime]::ParseExact((Get-Content $fullFlag).Trim(), "yyyy-MM-dd", $null)
$daysSinceLastFull = ($today - $lastFullDate).Days
$isFullBackupOverdue = $daysSinceLastFull -gt $fullBackupOverdueDays
Write-Log "INFO: Last full backup was $daysSinceLastFull days ago (from $fullFlag). Overdue threshold: $fullBackupOverdueDays days."
} catch {
$isFullBackupOverdue = $true
Write-Log "WARNING: Could not parse last full backup date in $fullFlag. Treating as overdue."
}
} else {
$isFullBackupOverdue = $true
Write-Log "WARNING: No last full backup date found in any candidate directories. Treating as overdue."
}
# Helper to ensure directory exists
function Ensure-DirExists([string]$path) {
if (-not (Test-Path $path)) {
try { New-Item -ItemType Directory -Path $path -Force | Out-Null } catch { }
}
}
# Determine preferred write location: prefer existing related flag location, otherwise first candidate dir
$firstDir = $dirs[0]
# If it's a full backup day or overdue, plan for full backup
if ((Get-Date).DayOfWeek -eq $fullBackupDay -or $isFullBackupOverdue) {
# If a full flag exists, use its location; else use firstDir
$targetFullFlag = if ($foundFull.Count -gt 0) { $foundFull[0] } else { Join-Path $firstDir "last_full.flag" }
$targetDir = Split-Path $targetFullFlag -Parent
Ensure-DirExists $targetDir
$currentValue = $null
if (Test-Path $targetFullFlag) {
try { $currentValue = (Get-Content $targetFullFlag).Trim() } catch { $currentValue = $null }
}
if (-not $currentValue -or $currentValue -ne $today.ToString("yyyy-MM-dd")) {
try {
Set-Content -Path $targetFullFlag -Value $today.ToString("yyyy-MM-dd") -Encoding UTF8
Write-Log "INFO: Created/Updated full backup flag file: $targetFullFlag"
} catch {
Write-Log "ERROR: Failed to create/update full backup flag file: $targetFullFlag. $($_.Exception.Message)"
}
$reason = if ($isFullBackupOverdue) { "overdue" } else { "scheduled" }
return @{ Type = "FULL"; CleanupTime = 168; Reason = $reason }
} else {
return @{ Type = "LOG"; CleanupTime = 24; Reason = "full already taken today" }
}
}
# Otherwise, plan for differential
# Prefer an existing diff flag location if present; else prefer the existing full flag location (write diff alongside full); otherwise firstDir
if ($foundDiff.Count -gt 0) {
$targetDiffFlag = $foundDiff[0]
} elseif ($foundFull.Count -gt 0) {
$targetDiffFlag = Join-Path (Split-Path $foundFull[0] -Parent) "last_diff.flag"
} else {
$targetDiffFlag = Join-Path $firstDir "last_diff.flag"
}
$targetDir = Split-Path $targetDiffFlag -Parent
Ensure-DirExists $targetDir
$currentDiffValue = $null
if (Test-Path $targetDiffFlag) {
try { $currentDiffValue = (Get-Content $targetDiffFlag).Trim() } catch { $currentDiffValue = $null }
}
if (-not $currentDiffValue -or $currentDiffValue -ne $today.ToString("yyyy-MM-dd")) {
try {
Set-Content -Path $targetDiffFlag -Value $today.ToString("yyyy-MM-dd") -Encoding UTF8
Write-Log "INFO: Created/Updated diff backup flag file: $targetDiffFlag"
} catch {
Write-Log "ERROR: Failed to create/update diff backup flag file: $targetDiffFlag. $($_.Exception.Message)"
}
return @{ Type = "DIFF"; CleanupTime = 168; Reason = "differential scheduled" }
} else {
return @{ Type = "LOG"; CleanupTime = 24; Reason = "diff already taken today" }
}
}
# Determine backup type
$backupInfo = Get-BackupType $directoryParam
Write-Log "Selected $($backupInfo.Type) backup ($($backupInfo.Reason))"
# Build the Ola H query. Include DatabasesInParallel only when multiple jobs are used
# Build parameter lines so we can avoid leaving a trailing comma when omitting DatabasesInParallel
$paramLines = @(
"@Databases = 'ALL_DATABASES'",
"@Directory = '$directoryParam'",
"@BackupType = '$($backupInfo.Type)'",
"@Verify = 'N'",
"@CleanupTime = $($backupInfo.CleanupTime)",
"@CheckSum = 'Y'",
"@LogToTable = 'Y'"
)
# Only enable DatabasesInParallel when we run more than one job
if ($Jobs -gt 1) {
$paramLines += "@DatabasesInParallel = 'Y'"
}
# Join with commas and indentation to produce clean SQL parameter list
$params = $paramLines -join ",`n "
$query = "EXECUTE [dbo].[DatabaseBackup] `n $params"
Write-Log "SQL Query: $query"
# Function to execute backup job with message capture
function Start-BackupJob {
param(
[int]$jobId,
[string]$sqlInstance,
[string]$query,
[string]$baseLogFile
)
$scriptBlock = {
param($JobId, $SqlInstance, $Query, $BaseLogFile)
# Debug the base log file parameter
Write-Output "DEBUG: BaseLogFile parameter = '$BaseLogFile'"
# Create job-specific log file path with fallback
if ($BaseLogFile -and $BaseLogFile.Trim() -ne "") {
$jobLogFile = $BaseLogFile -replace '\.log$', "-job$JobId.log"
} else {
# Fallback log file path using logDir
$jobLogFile = Join-Path $using:logDir "backup-multi-job$JobId.log"
}
Write-Output "DEBUG: Job log file will be: '$jobLogFile'"
function Write-JobLog($message, $suppressConsole = $false) {
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$logEntry = "$timestamp [JOB-$JobId] $message"
if ($jobLogFile -and $jobLogFile.Trim() -ne "") {
try {
Add-Content -Path $jobLogFile -Value $logEntry -Encoding UTF8
# Output to console for debugging (unless suppressed)
if (-not $suppressConsole) {
Write-Output "LOGGED TO $jobLogFile : $logEntry"
}
} catch {
Write-Output "LOG ERROR: $($_.Exception.Message) - File: $jobLogFile"
}
} else {
Write-Output "NO LOG FILE: jobLogFile is empty or null"
}
# Always output to console for job monitoring (unless suppressed)
if (-not $suppressConsole) {
Write-Output $logEntry
}
}
try {
Write-JobLog "Starting backup job"
# Create SQL connection with message capture
$connection = New-Object System.Data.SqlClient.SqlConnection
$connection.ConnectionString = "Server=$SqlInstance;Integrated Security=true;Connection Timeout=30"
$infoMessages = @()
# Event handler for informational messages (PRINT statements)
$connection.add_InfoMessage({
param($sqlSender, $e)
$message = $e.Message
if ($message -and $message.Trim() -ne "") {
$script:infoMessages += $message
Write-JobLog "SQL INFO: $message" $true # Suppress console output for verbose messages
}
})
try {
Write-JobLog "Attempting to connect to SQL Server: $SqlInstance"
$connection.Open()
Write-JobLog "Connected to SQL Server successfully"
$command = New-Object System.Data.SqlClient.SqlCommand
$command.Connection = $connection
$command.CommandText = $Query
$command.CommandTimeout = 0 # No timeout for backup operations
Write-JobLog "Executing backup command..."
# Execute and capture any result sets
$reader = $command.ExecuteReader()
# Process any result sets
while ($reader.Read()) {
$rowData = @()
for ($i = 0; $i -lt $reader.FieldCount; $i++) {
$rowData += "$($reader.GetName($i)): $($reader.GetValue($i))"
}
if ($rowData.Count -gt 0) {
$resultLine = "SQL RESULT: $($rowData -join ', ')"
Write-JobLog $resultLine
Write-Output $resultLine # Also output for Receive-Job
}
}
$reader.Close()
$summaryMessage = "Backup completed successfully. Captured $($infoMessages.Count) messages."
Write-JobLog $summaryMessage
Write-Output $summaryMessage # Also output for Receive-Job
# Output all captured SQL messages for debugging (only to log file, not console)
Write-JobLog "=== SQL MESSAGES START ===" $true
foreach ($msg in $infoMessages) {
Write-JobLog "SQL: $msg" $true
}
Write-JobLog "=== SQL MESSAGES END ===" $true
# Don't return hashtable - just output success message
Write-Output "JOB-${JobId}: SUCCESS"
}
finally {
if ($connection.State -eq [System.Data.ConnectionState]::Open) {
$connection.Close()
}
$connection.Dispose()
}
}
catch {
$errorMessage = "ERROR: Backup failed - $($_.Exception.Message)"
Write-JobLog $errorMessage
Write-Output $errorMessage # Also output for Receive-Job
# Check for specific connection errors
if ($_.Exception.Message -like "*server*not found*" -or
$_.Exception.Message -like "*network-related*" -or
$_.Exception.Message -like "*instance*" -or
$_.Exception.Message -like "*login*failed*") {
$connError = "ERROR: CONNECTION FAILURE - Check SQL Server instance name and connectivity"
Write-JobLog $connError
Write-Output $connError
}
# Log SQL Server specific errors
if ($_.Exception -is [System.Data.SqlClient.SqlException]) {
Write-JobLog "ERROR: SQL Server Error Details:"
Write-Output "ERROR: SQL Server Error Details:"
foreach ($sqlError in $_.Exception.Errors) {
$errorDetail = "ERROR: Severity: $($sqlError.Class), State: $($sqlError.State), Number: $($sqlError.Number)"
Write-JobLog $errorDetail
Write-Output $errorDetail
$errorMsg = "ERROR: Message: $($sqlError.Message)"
Write-JobLog $errorMsg
Write-Output $errorMsg
if ($sqlError.Procedure) {
$procError = "ERROR: Procedure: $($sqlError.Procedure), Line: $($sqlError.LineNumber)"
Write-JobLog $procError
Write-Output $procError
}
}
}
# Log full exception details for debugging
$fullError = "ERROR: Full Exception Type: $($_.Exception.GetType().Name)"
Write-JobLog $fullError
Write-Output $fullError
if ($_.Exception.InnerException) {
$innerError = "ERROR: Inner Exception: $($_.Exception.InnerException.Message)"
Write-JobLog $innerError
Write-Output $innerError
}
Write-Output "JOB-${JobId}: FAILED"
}
}
return Start-Job -ScriptBlock $scriptBlock -ArgumentList $jobId, $sqlInstance, $query, $baseLogFile
}
# Start parallel backup jobs
Write-Log "Starting $Jobs parallel backup jobs"
[System.Collections.ArrayList]$jobList = @()
for ($i = 1; $i -le $Jobs; $i++) {
$job = Start-BackupJob -jobId $i -sqlInstance $SqlInstance -query $query -baseLogFile $logFile
$null = $jobList.Add($job)
Write-Log "Started backup job $i (Job ID: $($job.Id))"
Start-Sleep -Milliseconds 4000 # Delay to stagger job starts
}
# Monitor jobs and capture output
Write-Log "Monitoring $($jobList.Count) backup jobs..."
$allJobsCompleted = $false
[System.Collections.ArrayList]$completedJobs = @()
while (-not $allJobsCompleted) {
Start-Sleep -Seconds 5
foreach ($job in $jobList) {
if ($job.Id -notin $completedJobs) {
# Check if job is no longer running
if ($job.State -eq "Completed" -or $job.State -eq "Failed" -or $job.State -eq "Stopped") {
$null = $completedJobs.Add($job.Id)
# Get all job output
$jobOutput = Receive-Job -Job $job -Keep # Use -Keep to preserve output
if ($job.State -eq "Completed") {
Write-Log "Job $($job.Id) completed successfully"
# Log all job output to main log
if ($jobOutput) {
Write-Log "=== Job $($job.Id) Output ==="
foreach ($line in $jobOutput) {
Write-Log "$line"
}
Write-Log "=== End Job $($job.Id) Output ==="
}
} else {
Write-Log "ERROR: Job $($job.Id) failed with state: $($job.State)"
if ($jobOutput) {
Write-Log "=== Job $($job.Id) Error Output ==="
foreach ($line in $jobOutput) {
Write-Log "ERROR: $line"
}
Write-Log "=== End Job $($job.Id) Error Output ==="
}
}
}
}
}
$allJobsCompleted = $completedJobs.Count -eq $jobList.Count
# Progress update
$runningCount = ($jobList | Where-Object { $_.State -eq "Running" }).Count
if ($runningCount -gt 0) {
Write-Log "Progress: $($completedJobs.Count)/$($jobList.Count) jobs completed, $runningCount still running..."
}
}
Write-Log "All backup jobs completed"
# Collect job states and outputs before cleanup for final status check
$jobResults = @{}
foreach ($job in $jobList) {
$jobOutput = Receive-Job -Job $job -Keep -ErrorAction SilentlyContinue
$hasFailed = $false
# Check if job output contains failure indicator
if ($jobOutput) {
foreach ($line in $jobOutput) {
if ($line -like "*JOB-*: FAILED") {
$hasFailed = $true
break
}
}
}
$jobResults[$job.Id] = @{
State = $job.State
Failed = $hasFailed
}
}
# Clean up jobs
Write-Log "Cleaning up completed jobs..."
foreach ($job in $jobList) {
try {
if ($job.State -eq "Running") {
Write-Log "WARNING: Job $($job.Id) still running, stopping it..."
Stop-Job -Job $job -Force
Start-Sleep -Seconds 2
}
Remove-Job -Job $job -Force -ErrorAction SilentlyContinue
Write-Log "Cleaned up job $($job.Id)"
} catch {
Write-Log "WARNING: Could not clean up job $($job.Id): $($_.Exception.Message)"
}
}
# Final status check using job output analysis
$failedJobIds = $jobResults.Keys | Where-Object { $jobResults[$_].Failed -eq $true }
if ($failedJobIds.Count -gt 0) {
Write-Log "ERROR: $($failedJobIds.Count) out of $($jobResults.Count) backup jobs failed"
foreach ($jobId in $failedJobIds) {
Write-Log "ERROR: Job ID $jobId failed"
}
Write-Log "CRITICAL: Backup operation failed - check errors above"
exit 1
} else {
Write-Log "SUCCESS: All $($jobResults.Count) backup jobs completed successfully"
}