Showing posts with label malayalamwiki. Show all posts
Showing posts with label malayalamwiki. Show all posts

Saturday, 10 February 2024

Adding category using PAWD

First 

let us collect the wikipedia articles that has no category of people who died in 2023 using sparql

Sample : Link

Second:

Extract the title using MS Excel


Third

Paste the article title in PAWS as given below

PAWS

------------

import pywikibot


def add_category_to_article(page_title, category):

    site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

    page = pywikibot.Page(site, page_title)


    # Check if the page exists

    if not page.exists():

        print(f"Page '{page_title}' does not exist.")

        return


    # Check if the page already has the category

    if category in page.categories():

        print(f"Page '{page_title}' already has the category '{category}'.")

        return


    # Add the category to the page

    page.text += f"\n[[വർഗ്ഗം:{category}]]"

    page.save(f"കാറ്റഗറി ചേർക്കുന്നു '{category}' ")


def main():

    articles = [

"എസ്ഥേർ_ഈല്ലം",

"ദരൂഷ്_മെഹ്‌റൂജി"



    ]


    category = "2023-ൽ മരിച്ചവർ"


    for article in articles:

        add_category_to_article(article, category)


if __name__ == "__main__":

    main()



Fourth

Run the python file.



NB: Note, there should not be comma after the last title in python and recheck the category name properly.

It is based on wikidata statement: Date of death.

Saturday, 27 January 2024

PAWS Fetch the pages from search box

 import requests

from bs4 import BeautifulSoup


search_url = "https://ml.wikipedia.org/w/index.php?title=പ്രത്യേകം:അന്വേഷണം&limit=500&offset=0&ns0=1&search=ഇസ്ലാം+ഇസ്ലാം&advancedSearch-current="


# Send a GET request to the search URL

response = requests.get(search_url)


# Check if the request was successful (status code 200)

if response.status_code == 200:

    # Parse the HTML content of the page

    soup = BeautifulSoup(response.text, 'html.parser')


    # Find the elements that contain article titles

    title_elements = soup.find_all('div', class_='mw-search-result-heading')


    # Extract and print the article titles

    for title_element in title_elements:

        article_title = title_element.text.strip()

        print(article_title)

else:

    print(f"Failed to retrieve search results. Status code: {response.status_code}")


Paws : Fixing spelling mistake (replace)

import pywikibot


site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

site.login()


# Read page titles from replace.txt

with open('replace.txt', 'r', encoding='utf-8') as file:

    page_titles = file.read().splitlines()


# Search and replace for each page title

search_term = 'അള്ളാഹു'

replace_term = 'അല്ലാഹു'


for title in page_titles:

    page = pywikibot.Page(site, title)

    page_text = page.text


    # Check if the page text already contains the replacement term

    if replace_term not in page_text:

        updated_text = page_text.replace(search_term, replace_term)


        # Edit the page with the updated text

        page.text = updated_text

        page.save(summary='Fixing misspelling')

    else:

        print(f'Skipping page {title} as it already contains the replacement term.')