#!/bin/bash ################################################################################ # sort_vault.sh - M.O.C. Vault Organization Script ################################################################################ # This script organizes markdown notes into a Method of Control (M.O.C.) # directory structure using Gemini AI for intelligent classification. # # M.O.C. Structure: # 00-Meta - System metadata and configuration # 10-Input - Raw, unprocessed notes and content # 20-Knowledge - Refined, permanent knowledge in your own words # 30-MOCs - Maps of Content (index/hub notes) # 40-Archive - Stale or outdated content # # Usage: # ./sort_vault.sh [--dry-run] ################################################################################ set -euo pipefail # Exit on error, undefined variables, and pipe failures ################################################################################ # CONFIGURATION ################################################################################ # M.O.C. directory structure MOC_DIRS=("00-Meta" "10-Input" "20-Knowledge" "30-MOCs" "40-Archive") # Source directory containing notes to sort SOURCE_DIR="Old_Notes" # Special handling directory for Reina-related notes REINA_DIR="10-Input/Reina-Notes" # Gemini API configuration GEMINI_MODEL="gemini-2.5-flash-preview-09-2025" GEMINI_API_URL="https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent" # Rate limiting (seconds to wait between API calls) # Free tier limit: 10 requests per minute = 1 request per 6 seconds API_DELAY=15 # Log files ERROR_LOG="sort_vault_errors.log" DEBUG_LOG="sort_vault_debug.log" SUMMARY_LOG="sort_vault_summary.log" # Dry run mode (set via command line) DRY_RUN=false # System instruction for Gemini AI read -r -d '' SYSTEM_INSTRUCTION << 'EOF' || true You are Blight, an experienced vault assistant. Classify the user's Markdown note content into one of the following MOC folders: - 10-Input: raw lectures, articles, fleeting notes, unprocessed content - 20-Knowledge: refined, permanent concepts written in the user's own words - 30-MOCs: index/hub notes that link to other notes, maps of content - 40-Archive: stale, outdated, or no longer relevant content Provide a clean, suitable filename for the new location. Remove dates, special characters, and make it descriptive. Respond ONLY with a valid JSON object in this exact format: {"target_dir": "[Target MOC Folder]", "new_filename": "[Cleaned Note Title.md]"} EOF ################################################################################ # COMMAND LINE ARGUMENTS ################################################################################ while [[ $# -gt 0 ]]; do case $1 in --dry-run) DRY_RUN=true echo "🔍 DRY RUN MODE: No files will be moved or modified." echo "" shift ;; *) echo "Unknown option: $1" echo "Usage: $0 [--dry-run]" exit 1 ;; esac done ################################################################################ # ERROR HANDLING ################################################################################ # Check if jq is installed (required for JSON parsing) if ! command -v jq &> /dev/null; then echo "ERROR: jq is required but not installed." echo "Please install jq: https://jqlang.github.io/jq/download/" exit 1 fi # Check if API key is set (only if not in dry run) if [[ "$DRY_RUN" == "false" ]] && [[ -z "${GEMINI_API_KEY:-}" ]]; then echo "ERROR: GEMINI_API_KEY environment variable is not set." echo "Please set it using: export GEMINI_API_KEY='your-api-key-here'" exit 1 fi # Check if source directory exists if [[ ! -d "$SOURCE_DIR" ]]; then echo "ERROR: Source directory '$SOURCE_DIR' does not exist." echo "Please ensure the directory exists before running this script." exit 1 fi # Initialize log files if [[ "$DRY_RUN" == "false" ]]; then > "$ERROR_LOG" # Clear or create error log > "$DEBUG_LOG" # Clear or create debug log > "$SUMMARY_LOG" # Clear or create summary log echo "[$(date '+%Y-%m-%d %H:%M:%S')] === Vault Organization Started ===" >> "$DEBUG_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] Configuration:" >> "$DEBUG_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] - Source Directory: $SOURCE_DIR" >> "$DEBUG_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] - Gemini Model: $GEMINI_MODEL" >> "$DEBUG_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] - API Delay: $API_DELAY seconds" >> "$DEBUG_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] - M.O.C. Directories: ${MOC_DIRS[*]}" >> "$DEBUG_LOG" echo "Log files:" echo " - Error log: $ERROR_LOG" echo " - Debug log: $DEBUG_LOG" echo " - Summary log: $SUMMARY_LOG" echo "" fi ################################################################################ # UTILITY FUNCTIONS ################################################################################ # Log an error to the error log file log_error() { local message="$1" local file="${2:-}" if [[ "$DRY_RUN" == "false" ]]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $message ${file:+- File: $file}" >> "$ERROR_LOG" echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $message ${file:+- File: $file}" >> "$DEBUG_LOG" fi } # Log debug information log_debug() { local message="$1" local file="${2:-}" if [[ "$DRY_RUN" == "false" ]]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $message ${file:+- File: $file}" >> "$DEBUG_LOG" fi } # Log a summary entry log_summary() { local action="$1" local source="$2" local destination="$3" local status="${4:-SUCCESS}" if [[ "$DRY_RUN" == "false" ]]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] $status | $action | $source -> $destination" >> "$SUMMARY_LOG" fi } # Extract subdirectory name from a file path (e.g., "Calls", "Classes", "Meetings") # Returns empty string if file is directly in SOURCE_DIR get_subdirectory_name() { local file_path="$1" local relative_path="${file_path#$SOURCE_DIR/}" # Check if there's a subdirectory if [[ "$relative_path" == *"/"* ]]; then # Extract first subdirectory name echo "${relative_path%%/*}" else echo "" fi } # Find all markdown files that reference a given non-markdown file find_referencing_markdown_files() { local non_md_file="$1" local basename_only=$(basename "$non_md_file") # Search for references in markdown files (wikilinks, markdown links, or plain mentions) # Look for: [[filename]], [text](filename), or just the filename grep -rli --include="*.md" \ -e "\[\[.*${basename_only}.*\]\]" \ -e "(${basename_only})" \ -e "${basename_only}" \ "$SOURCE_DIR" 2>/dev/null || true } ################################################################################ # STEP 1: CREATE M.O.C. DIRECTORY STRUCTURE ################################################################################ echo "=== Creating M.O.C. Directory Structure ===" log_debug "Creating M.O.C. directory structure" for dir in "${MOC_DIRS[@]}"; do if [[ ! -d "$dir" ]]; then if [[ "$DRY_RUN" == "false" ]]; then mkdir -p "$dir" echo "Created: $dir" log_debug "Created directory: $dir" else echo "[DRY RUN] Would create: $dir" fi else echo "Already exists: $dir" log_debug "Directory already exists: $dir" fi done echo "" ################################################################################ # STEP 2: INITIALIZE GIT REPOSITORY ################################################################################ echo "=== Initializing Git Repository ===" log_debug "Initializing Git repository" if [[ ! -d ".git" ]]; then if [[ "$DRY_RUN" == "false" ]]; then git init echo "Git repository initialized." log_debug "Git repository initialized successfully" else echo "[DRY RUN] Would initialize Git repository" fi else echo "Git repository already exists." log_debug "Git repository already exists" fi # Stage the M.O.C. directories if [[ "$DRY_RUN" == "false" ]]; then echo "Staging M.O.C. directories..." for dir in "${MOC_DIRS[@]}"; do # Create a .gitkeep file to ensure empty directories are tracked touch "${dir}/.gitkeep" git add "${dir}/.gitkeep" done else echo "[DRY RUN] Would stage M.O.C. directories" fi echo "" ################################################################################ # STEP 3: BUILD FILE REFERENCE MAP FOR NON-MARKDOWN FILES ################################################################################ echo "=== Building reference map for non-markdown files ===" log_debug "Building reference map for non-markdown files" # Declare associative array to map non-markdown files to their referencing markdown files declare -A non_md_reference_map # Find all non-markdown files while IFS= read -r -d '' non_md_file; do # Find markdown files that reference this non-markdown file referencing_files=$(find_referencing_markdown_files "$non_md_file") if [[ -n "$referencing_files" ]]; then # Store the first referencing file (we'll move the asset with its primary reference) first_ref=$(echo "$referencing_files" | head -n 1) non_md_reference_map["$non_md_file"]="$first_ref" echo " Found reference: $(basename "$non_md_file") → $(basename "$first_ref")" log_debug "Found reference" "$(basename "$non_md_file") -> $(basename "$first_ref")" else # No references found - will move to 40-Archive by default non_md_reference_map["$non_md_file"]="ORPHAN" echo " No reference: $(basename "$non_md_file") (will archive)" log_debug "No reference found for non-markdown file" "$(basename "$non_md_file")" fi done < <(find "$SOURCE_DIR" -type f ! -name "*.md" -print0) log_debug "Reference map complete: ${#non_md_reference_map[@]} non-markdown files found" echo "" ################################################################################ # STEP 4: PROCESS MARKDOWN NOTES FROM OLD_NOTES DIRECTORY ################################################################################ echo "=== Processing Markdown Notes from $SOURCE_DIR ===" # Counter for processed files processed_count=0 reina_count=0 api_count=0 skipped_count=0 # Associative array to track where markdown files were moved declare -A markdown_destinations # Find all .md files in the source directory while IFS= read -r -d '' file; do # Get the filename without path filename=$(basename "$file") # Extract subdirectory name if file is in a subdirectory subdirectory=$(get_subdirectory_name "$file") echo "Processing: $filename" if [[ -n "$subdirectory" ]]; then echo " → From subdirectory: $subdirectory" fi ######################################################################## # STEP 4A: CHECK FOR REINA-RELATED NOTES ######################################################################## # Case-insensitive check for "Reina" in filename if [[ "$filename" =~ [Rr][Ee][Ii][Nn][Aa] ]]; then echo " → Reina-related note detected" log_debug "Reina-related note detected" "$filename" if [[ "$DRY_RUN" == "false" ]]; then # Create Reina directory if it doesn't exist base_reina_dir="$REINA_DIR" if [[ -n "$subdirectory" ]]; then base_reina_dir="${REINA_DIR}/${subdirectory}" fi if [[ ! -d "$base_reina_dir" ]]; then mkdir -p "$base_reina_dir" echo " → Created directory: $base_reina_dir" log_debug "Created Reina subdirectory: $base_reina_dir" fi # Move file to Reina directory target_path="${base_reina_dir}/${filename}" mv "$file" "$target_path" echo " ✓ Moved to: $target_path" log_summary "REINA_MOVE" "$file" "$target_path" # Track destination for non-markdown file handling markdown_destinations["$file"]="$target_path" else base_reina_dir="$REINA_DIR" if [[ -n "$subdirectory" ]]; then base_reina_dir="${REINA_DIR}/${subdirectory}" fi echo " [DRY RUN] Would move to: ${base_reina_dir}/${filename}" markdown_destinations["$file"]="${base_reina_dir}/${filename}" fi reina_count=$((reina_count + 1)) processed_count=$((processed_count + 1)) echo "" continue fi ######################################################################## # STEP 4B: CLASSIFY USING GEMINI API ######################################################################## echo " → Classifying with Gemini AI..." if [[ "$DRY_RUN" == "true" ]]; then echo " [DRY RUN] Would classify and move file" # In dry run, assume it goes to 20-Knowledge for testing markdown_destinations["$file"]="20-Knowledge/${filename}" api_count=$((api_count + 1)) processed_count=$((processed_count + 1)) echo "" continue fi # Prepare API request payload using jq to ensure proper JSON formatting api_payload=$(jq -n \ --arg file_content "$(cat "$file")" \ --arg system_instruction "$SYSTEM_INSTRUCTION" \ '{ "contents": [{ "parts": [{ "text": ("Classify this note and provide the target directory and new filename.\n\nNote content:\n" + $file_content) }] }], "systemInstruction": { "parts": [{ "text": $system_instruction }] }, "generationConfig": { "temperature": 0.2, "topK": 40, "topP": 0.95, "maxOutputTokens": 1024, "responseMimeType": "application/json" } }') # Call Gemini API api_response=$(curl -s --max-time 30 -X POST "${GEMINI_API_URL}?key=${GEMINI_API_KEY}" \ -H "Content-Type: application/json" \ -d "$api_payload" 2>&1) curl_exit_code=$? if [[ $curl_exit_code -ne 0 ]]; then echo " ✗ curl failed with exit code: $curl_exit_code" echo " → Skipping file: $filename" log_error "curl failed with exit code $curl_exit_code" "$filename" log_summary "API_CLASSIFY" "$file" "SKIPPED" "CURL_ERROR" skipped_count=$((skipped_count + 1)) echo "" continue fi # Rate limiting to avoid hitting API limits sleep "$API_DELAY" # Check for API errors if echo "$api_response" | jq -e '.error' > /dev/null 2>&1; then error_msg=$(echo "$api_response" | jq -r '.error.message // "Unknown error"') echo " ✗ API Error: $error_msg" echo " → Skipping file: $filename" log_error "API Error: $error_msg" "$filename" log_summary "API_CLASSIFY" "$file" "SKIPPED" "API_ERROR" skipped_count=$((skipped_count + 1)) echo "" continue fi ######################################################################## # STEP 4C: PARSE JSON RESPONSE AND MOVE FILE ######################################################################## # Extract the text content from the API response response_text=$(echo "$api_response" | jq -r '.candidates[0].content.parts[0].text // empty') if [[ -z "$response_text" ]]; then echo " ✗ Failed to get classification from API" echo " → Skipping file: $filename" log_error "Empty API response" "$filename" log_summary "API_CLASSIFY" "$file" "SKIPPED" "EMPTY_RESPONSE" skipped_count=$((skipped_count + 1)) echo "" continue fi log_debug "API response received" "$filename: $response_text" # Parse JSON from response target_dir=$(echo "$response_text" | jq -r '.target_dir // empty') new_filename=$(echo "$response_text" | jq -r '.new_filename // empty') # Validate parsed JSON if [[ -z "$target_dir" ]] || [[ -z "$new_filename" ]]; then echo " ✗ Invalid JSON response from API" echo " Response: $response_text" echo " → Skipping file: $filename" log_error "Invalid JSON response: $response_text" "$filename" log_summary "API_CLASSIFY" "$file" "SKIPPED" "INVALID_JSON" skipped_count=$((skipped_count + 1)) echo "" continue fi log_debug "Parsed classification" "$filename -> $target_dir/$new_filename" # Validate target directory exists in M.O.C. structure if [[ ! -d "$target_dir" ]]; then echo " ✗ Invalid target directory: $target_dir" echo " → Skipping file: $filename" log_error "Invalid target directory: $target_dir" "$filename" log_summary "API_CLASSIFY" "$file" "SKIPPED" "INVALID_DIR" skipped_count=$((skipped_count + 1)) echo "" continue fi # Ensure new filename has .md extension if [[ ! "$new_filename" =~ \.md$ ]]; then new_filename="${new_filename}.md" fi # Create target path, adding subdirectory if needed if [[ -n "$subdirectory" ]]; then target_dir="${target_dir}/${subdirectory}" # Create subdirectory if needed if [[ ! -d "$target_dir" ]]; then mkdir -p "$target_dir" echo " → Created ${subdirectory} subdirectory" fi fi target_path="${target_dir}/${new_filename}" # Handle filename conflicts if [[ -f "$target_path" ]]; then # Append timestamp to make filename unique timestamp=$(date +"%Y%m%d-%H%M%S") new_filename="${new_filename%.md}-${timestamp}.md" target_path="${target_dir}/${new_filename}" echo " ⚠ File exists, using unique name: $new_filename" log_debug "Filename conflict resolved" "$filename -> $new_filename" fi # Move the file mv "$file" "$target_path" echo " ✓ Classified as: $target_dir" echo " ✓ Moved to: $target_path" log_summary "API_CLASSIFY" "$file" "$target_path" # Track destination for non-markdown file handling markdown_destinations["$file"]="$target_path" api_count=$((api_count + 1)) processed_count=$((processed_count + 1)) echo "" done < <(find "$SOURCE_DIR" -type f -name "*.md" -print0) echo "=== Markdown Processing Complete ===" echo "Total markdown files processed: $processed_count" echo " - Reina notes: $reina_count" echo " - API classified: $api_count" echo " - Skipped: $skipped_count" echo "" ################################################################################ # STEP 5: PROCESS NON-MARKDOWN FILES ################################################################################ echo "=== Processing Non-Markdown Files ===" non_md_moved=0 non_md_archived=0 for non_md_file in "${!non_md_reference_map[@]}"; do referencing_md="${non_md_reference_map[$non_md_file]}" basename_file=$(basename "$non_md_file") echo "Processing: $basename_file" if [[ "$referencing_md" == "ORPHAN" ]]; then # No references found - move to Archive target_dir="40-Archive" target_path="${target_dir}/${basename_file}" echo " → No references found, archiving" if [[ "$DRY_RUN" == "false" ]]; then # Handle filename conflicts if [[ -f "$target_path" ]]; then timestamp=$(date +"%Y%m%d-%H%M%S") base_name="${basename_file%.*}" extension="${basename_file##*.}" if [[ "$base_name" == "$extension" ]]; then # No extension target_path="${target_dir}/${basename_file}-${timestamp}" else target_path="${target_dir}/${base_name}-${timestamp}.${extension}" fi echo " ⚠ File exists, using unique name" log_debug "Non-markdown filename conflict resolved" "$basename_file" fi mv "$non_md_file" "$target_path" echo " ✓ Moved to: $target_path" log_summary "ARCHIVE_ORPHAN" "$non_md_file" "$target_path" else echo " [DRY RUN] Would move to: $target_path" fi non_md_archived=$((non_md_archived + 1)) else # Move to same directory as the referencing markdown file if [[ -n "${markdown_destinations[$referencing_md]:-}" ]]; then md_dest="${markdown_destinations[$referencing_md]}" target_dir=$(dirname "$md_dest") # Check if the non-markdown file itself is from a subdirectory # and preserve that structure non_md_subdirectory=$(get_subdirectory_name "$non_md_file") if [[ -n "$non_md_subdirectory" ]]; then # Check if referencing markdown is from the same subdirectory md_subdirectory=$(get_subdirectory_name "$referencing_md") if [[ "$non_md_subdirectory" == "$md_subdirectory" ]]; then # Already in same subdirectory structure, target_dir is correct : else # Markdown is from different subdirectory, but preserve asset's subdirectory base_target=$(dirname "$md_dest") # Remove any subdirectory suffix from base_target to get M.O.C. folder while [[ "$(basename "$base_target")" != "00-Meta" ]] && \ [[ "$(basename "$base_target")" != "10-Input" ]] && \ [[ "$(basename "$base_target")" != "20-Knowledge" ]] && \ [[ "$(basename "$base_target")" != "30-MOCs" ]] && \ [[ "$(basename "$base_target")" != "40-Archive" ]] && \ [[ "$base_target" != "." ]]; do base_target=$(dirname "$base_target") done if [[ "$base_target" != "." ]]; then target_dir="${base_target}/${non_md_subdirectory}" if [[ ! -d "$target_dir" ]]; then mkdir -p "$target_dir" echo " → Created ${non_md_subdirectory} subdirectory" fi fi fi fi target_path="${target_dir}/${basename_file}" echo " → Referenced by: $(basename "$referencing_md")" echo " → Destination: $target_dir" if [[ "$DRY_RUN" == "false" ]]; then # Handle filename conflicts if [[ -f "$target_path" ]]; then timestamp=$(date +"%Y%m%d-%H%M%S") base_name="${basename_file%.*}" extension="${basename_file##*.}" if [[ "$base_name" == "$extension" ]]; then # No extension target_path="${target_dir}/${basename_file}-${timestamp}" else target_path="${target_dir}/${base_name}-${timestamp}.${extension}" fi echo " ⚠ File exists, using unique name" fi mv "$non_md_file" "$target_path" echo " ✓ Moved to: $target_path" log_summary "MOVE_WITH_REF" "$non_md_file" "$target_path" else echo " [DRY RUN] Would move to: $target_path" fi non_md_moved=$((non_md_moved + 1)) else # Referencing markdown file wasn't processed - archive for safety echo " ⚠ Referenced markdown file wasn't moved, archiving for safety" target_dir="40-Archive" target_path="${target_dir}/${basename_file}" if [[ "$DRY_RUN" == "false" ]]; then mv "$non_md_file" "$target_path" echo " ✓ Moved to: $target_path" log_summary "ARCHIVE_UNPROCESSED_REF" "$non_md_file" "$target_path" else echo " [DRY RUN] Would move to: $target_path" fi non_md_archived=$((non_md_archived + 1)) fi fi echo "" done echo "=== Non-Markdown Processing Complete ===" echo "Total non-markdown files: $((non_md_moved + non_md_archived))" echo " - Moved with references: $non_md_moved" echo " - Archived (no references): $non_md_archived" echo "" ################################################################################ # STEP 6: CLEANUP AND GIT COMMIT ################################################################################ if [[ "$DRY_RUN" == "false" ]]; then echo "=== Cleaning Up ===" log_debug "Starting cleanup process" # Remove the Old_Notes directory tree recursively if it's empty if [[ -d "$SOURCE_DIR" ]]; then # Try to remove all empty directories recursively find "$SOURCE_DIR" -type d -empty -delete 2>/dev/null || true # Check if source directory itself is now empty if [[ -z "$(ls -A "$SOURCE_DIR" 2>/dev/null)" ]]; then rmdir "$SOURCE_DIR" echo "Removed empty directory: $SOURCE_DIR" log_debug "Removed empty source directory: $SOURCE_DIR" else echo "Warning: $SOURCE_DIR is not empty. Manual cleanup may be required." echo "Remaining files:" remaining_files=$(find "$SOURCE_DIR" -type f) echo "$remaining_files" log_error "Source directory not empty after processing" "$SOURCE_DIR" log_debug "Remaining files in source directory: $remaining_files" fi fi echo "" echo "=== Committing Changes to Git ===" log_debug "Committing changes to Git" # Stage all changes git add -A log_debug "Staged all changes" # Create commit if git diff --cached --quiet; then echo "No changes to commit." log_debug "No changes to commit" else git commit -m "Vault initialized and initial files sorted." echo "Changes committed successfully." log_debug "Git commit created successfully" fi echo "" echo "=== Vault Organization Complete ===" echo "Your notes have been organized into the M.O.C. structure." echo "Use 'git log' to see the commit history." if [[ $skipped_count -gt 0 ]]; then echo "" echo "⚠ Warning: $skipped_count file(s) were skipped due to errors." echo "Check the error log for details: $ERROR_LOG" fi # Write final summary to debug log log_debug "=== Final Summary ===" log_debug "Total markdown files processed: $processed_count" log_debug " - Reina notes: $reina_count" log_debug " - API classified: $api_count" log_debug " - Skipped: $skipped_count" log_debug "Total non-markdown files: $((non_md_moved + non_md_archived))" log_debug " - Moved with references: $non_md_moved" log_debug " - Archived (no references): $non_md_archived" log_debug "=== Vault Organization Completed ===" else echo "=== Dry Run Complete ===" echo "No files were moved or modified." echo "Run without --dry-run to apply changes." fi