Saturday 10 February 2024

Adding category using PAWD

First 

let us collect the wikipedia articles that has no category of people who died in 2023 using sparql

Sample : Link

Second:

Extract the title using MS Excel


Third

Paste the article title in PAWS as given below

PAWS

------------

import pywikibot


def add_category_to_article(page_title, category):

    site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

    page = pywikibot.Page(site, page_title)


    # Check if the page exists

    if not page.exists():

        print(f"Page '{page_title}' does not exist.")

        return


    # Check if the page already has the category

    if category in page.categories():

        print(f"Page '{page_title}' already has the category '{category}'.")

        return


    # Add the category to the page

    page.text += f"\n[[വർഗ്ഗം:{category}]]"

    page.save(f"കാറ്റഗറി ചേർക്കുന്നു '{category}' ")


def main():

    articles = [

"എസ്ഥേർ_ഈല്ലം",

"ദരൂഷ്_മെഹ്‌റൂജി"



    ]


    category = "2023-ൽ മരിച്ചവർ"


    for article in articles:

        add_category_to_article(article, category)


if __name__ == "__main__":

    main()



Fourth

Run the python file.



NB: Note, there should not be comma after the last title in python and recheck the category name properly.

It is based on wikidata statement: Date of death.

Saturday 27 January 2024

PAWS Fetch the pages from search box

 import requests

from bs4 import BeautifulSoup


search_url = "https://ml.wikipedia.org/w/index.php?title=പ്രത്യേകം:അന്വേഷണം&limit=500&offset=0&ns0=1&search=ഇസ്ലാം+ഇസ്ലാം&advancedSearch-current="


# Send a GET request to the search URL

response = requests.get(search_url)


# Check if the request was successful (status code 200)

if response.status_code == 200:

    # Parse the HTML content of the page

    soup = BeautifulSoup(response.text, 'html.parser')


    # Find the elements that contain article titles

    title_elements = soup.find_all('div', class_='mw-search-result-heading')


    # Extract and print the article titles

    for title_element in title_elements:

        article_title = title_element.text.strip()

        print(article_title)

else:

    print(f"Failed to retrieve search results. Status code: {response.status_code}")


Paws : Fixing spelling mistake (replace)

import pywikibot


site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

site.login()


# Read page titles from replace.txt

with open('replace.txt', 'r', encoding='utf-8') as file:

    page_titles = file.read().splitlines()


# Search and replace for each page title

search_term = 'അള്ളാഹു'

replace_term = 'അല്ലാഹു'


for title in page_titles:

    page = pywikibot.Page(site, title)

    page_text = page.text


    # Check if the page text already contains the replacement term

    if replace_term not in page_text:

        updated_text = page_text.replace(search_term, replace_term)


        # Edit the page with the updated text

        page.text = updated_text

        page.save(summary='Fixing misspelling')

    else:

        print(f'Skipping page {title} as it already contains the replacement term.')


Friday 26 January 2024

Edit article using PAWS

Bot Editing in Wikipedia

 

1.      Use Paws

https://hub-paws.wmcloud.org

 

2.      Log in

3.      Need 2 files : 1. Terminal   2. Txt file ( For eg: Test.txt)

4.      Enter the codes in Test.txt file

( Example:

 

 {{-start-}}

'''Article Title' 

Introduction of article

 

==History==

Need to enter content here

 

{{-stop-}}

 

5.      Come back to terminal and log in to your wiki for example:

@PAWS:~$ pwb.py login -lang:ml -family:wikipedia

6.      Then execute your test.txt file as given below

pwb.py pagefromfile -file:/home/paws/test.txt -lang:ml -family:wikipedia -force

 

This is to add content in the existing article.

 ----------------------------Part 2-------------------

Adding Category



import pywikibot


def add_category_to_article(page_title, category):

    site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

    page = pywikibot.Page(site, page_title)


    # Check if the page exists

    if not page.exists():

        print(f"Page '{page_title}' does not exist.")

        return


    # Check if the page already has the category

    if category in page.categories():

        print(f"Page '{page_title}' already has the category '{category}'.")

        return


    # Add the category to the page

    page.text += f"\n[[വർഗ്ഗം:{category}]]"

    page.save(f"Adding category '{category}' to the page")


def main():

    articles = [

        "അഡിനോഫൈബ്രോമ",

        "അഡിനോമയോസിസ്",

        "ഒവേറിയൻ_ഫൈബ്രോമ",

        "ഒവേറിയൻ_സിസ്റ്റാഡിനോമ",

        "ഒവേറിയൻ_സീറസ്_സിസ്റ്റാഡിനോമ",

        "ഓപ്സോമെനോറിയ",

        "ഗർഭാശയ_പോളിപ്പ്",

        "ജനനേന്ദ്രിയ_അരിമ്പാറ",

        "ജയന്റ്_കോണ്ടൈലോമാ_അക്യൂമിനേറ്റം",

        "ജേം_സെൽ_ട്യൂമർ",

        "ജോൺ_സ്പിയേഴ്‌സ്",

        "മയോമ",

        "വജൈനൽ_യീസ്റ്റ്_ഇൻഫെക്ഷൻ"

    ]


    category = "രോഗങ്ങൾ"


    for article in articles:

        add_category_to_article(article, category)


if __name__ == "__main__":

    main()

Monday 15 January 2024

Python Query for Description

 import requests

import csv


def get_missing_labels(limit=10000):

    # Define the Wikidata endpoint and SPARQL query

    endpoint = "https://query.wikidata.org/sparql"

    query = f"""

    SELECT ?item ?itemLabel

    WHERE {{

      ?item wdt:P31 wd:Q202444;  # Instance of "male given name"

            schema:description ?description.

      FILTER(LANG(?description) = "en")  # English description

      FILTER NOT EXISTS {{

        ?item rdfs:label ?malayalamLabel.

        FILTER(LANG(?malayalamLabel) = "ml")  # Malayalam label does not exist

      }}

    }}

    LIMIT {limit}

    """


    # Set headers for the request

    headers = {

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

        'Accept': 'application/json',

        'Content-Type': 'application/json',

    }


    # Set parameters for the request

    params = {

        'query': query,

        'format': 'json'

    }


    # Make the request to the Wikidata API

    response = requests.get(endpoint, headers=headers, params=params)


    # Check if the request was successful (status code 200)

    if response.status_code == 200:

        data = response.json().get('results', {}).get('bindings', [])


        # Create a list to store the results

        results = []


        for item in data:

            entity_id = item.get('item', {}).get('value', '').split('/')[-1]

            english_label = item.get('itemLabel', {}).get('value', '')

            results.append({'entity_id': entity_id, 'english_label': english_label})


        return results

    else:

        print(f"Failed to retrieve data. Status code: {response.status_code}")

        return None


def write_to_csv(data, filename='missing_labels_male_given_name.csv'):

    # Write the results to a CSV file

    with open(filename, 'w', newline='', encoding='utf-8') as csv_file:

        fieldnames = ['entity_id', 'english_label']

        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()

        writer.writerows(data)


if __name__ == "__main__":

    # Run the function to get items with missing labels

    results = get_missing_labels(limit=10000)


    # Check if there are results

    if results:

        # Print the results

        print("Items with English description 'male given name' lacking Malayalam label (limited to 10000 results):")

        for result in results:

            print(f"{result['entity_id']}: {result['english_label']}")


        # Write the results to a CSV file

        write_to_csv(results, filename='missing_labels_male_given_name.csv')


        print("\nResults have been saved to missing_labels_male_given_name.csv.")

    else:

        print("No results obtained.")