shoaib sherazi: pknic scraper

pknic scraper

Wednesday, 7 February 2024 -
# main code 
import requests
from bs4 import BeautifulSoup

# URL of the page with the form
form_url = 'https://pk6.pknic.net.pk/pk5/lookup.PK'

# Path to the text file with domain names
file_path = 'words.txt'  # Replace with your file path

# Function to check domain status
def check_domain_status(domain):
    form_data = {'name': domain}
    response = requests.post(form_url, data=form_data)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Check for "Domain is Registered" in text
    if "Domain is Registered" in soup.get_text():
        create_date = expire_date = 'Not found'

        # Find the rows containing "Create Date:" and "Expire Date:"
        for row in soup.find_all('tr'):
            if 'Create Date:' in row.get_text():
                create_date_td = row.find('td', string=lambda text: text and "Create Date:" in text)
                if create_date_td:
                    create_date_span = create_date_td.find_next('span')
                    if create_date_span:
                        create_date = create_date_span.get_text(strip=True)

            if 'Expire Date:' in row.get_text():
                expire_date_td = row.find('td', string=lambda text: text and "Expire Date:" in text)
                if expire_date_td:
                    expire_date_span = expire_date_td.find_next('span')
                    if expire_date_span:
                        expire_date = expire_date_span.get_text(strip=True)

        return f"Domain {domain} is registered. Create Date: {create_date}, Expire Date: {expire_date}."
    elif "Domain not found" in soup.get_text():
        return f"Domain {domain} is available for registration."

    return f"Status of domain {domain} could not be determined."

# Read domain names from the file and check their status
with open(file_path, 'r') as file:
    domain_names = file.read().splitlines()

for domain in domain_names:
    status = check_domain_status(domain)
    print(status)
    # Add a delay if necessary to avoid overwhelming the server
    # time.sleep(1)

#and this is list maker code which add .pk with each word

# Open the input file in read mode
input_file_path = 'input.txt'  # Replace with the path to your input file
output_file_path = 'output.txt'  # Replace with the path to your output file
try:
    with open(input_file_path, 'r') as input_file:
        # Read lines from the input file and filter words with 4 to 5 alphabets
        filtered_words = [word.strip() + '.pk' for word in input_file.readlines() if 4 <= len(word.strip()) <= 5]
    # Write the filtered words to the output file
    with open(output_file_path, 'w') as output_file:
        output_file.write('\n'.join(filtered_words))
    print(f"Filtered words with '.pk' suffix have been saved to {output_file_path}")
except FileNotFoundError:
    print(f"Input file '{input_file_path}' not found.")
except Exception as e:
    print(f"An error occurred: {str(e)}")