#!/bin/bash # Step 1: Get HTML content of the page html_content=$(curl -s https://zaiste.net/posts/shell-commands-rust/) # Step 2: Extract href attribute values of "GitHub" links github_links=$(echo "$html_content" | grep -oE ']+>GitHub<\/a>' | grep -oE 'href="[^"]+"' | sed 's/href="//;s/"$//') # Step 3: Transform URLs transformed_urls=() for link in $github_links; do if [[ "$link" != *"github"* ]]; then continue fi transformed_url=$(echo "$link" | sed 's/github.com/raw.githubusercontent.com/;s#/$##') transformed_urls+=("$transformed_url/master/Cargo.toml") done echo "Transformed URLs:" for url in "${transformed_urls[@]}"; do echo "$url" done # Step 4: Test URLs and gather results existing_urls=() non_existing_urls=() for url in "${transformed_urls[@]}"; do response=$(curl -s --head -w %{http_code} "$url" -o /dev/null) if [ "$response" -eq 200 ]; then existing_urls+=("$url") else non_existing_urls+=("$url") fi done # Step 5 and 6: Search for "name" property and categorize URLs successful_search=() unsuccessful_search=() for url in "${existing_urls[@]}"; do html_content=$(curl -s "$url") if grep -qE "\[package\]" <<< "$html_content"; then # name_line=$(grep -A 1 "\[package\]" <<< "$html_content" | grep -E '^name = "[^"]+"' | sed 's/name = "//;s/"$//') name_line=$(awk -F'"' '/^\[package\]/ { in_package = 1 } in_package && /name =/ { print $2; exit }' <<< "$html_content") echo "name_line: ${name_line}" if [[ "$name_line" != "[package]" ]]; then successful_search+=("$url $name_line") else unsuccessful_search+=("$url") fi fi done # Step 7: Print results echo -e "\n\n\nExisting URLs:" for url in "${existing_urls[@]}"; do echo "$url" done echo -e "\n\n\nNon-existing URLs:" for url in "${non_existing_urls[@]}"; do echo "$url" done # echo -e "\n\n\nSuccessful search for 'name' property:" echo -ne "\n\n\ncargo install" for result in "${successful_search[@]}"; do # url=$(echo "$result" | cut -d' ' -f1) name=$(echo "$result" | cut -d' ' -f2) echo -n " $name" done # echo -e "\n\n\nUnsuccessful search for 'name' property:" # for url in "${unsuccessful_search[@]}"; do # echo "$url" # done