Sunday 29 September 2024

PAWS in Wikisource

Add page number 

import pywikibot

# Set the site to Malayalam Wikisource
site = pywikibot.Site('ml', 'wikisource')

# Loop through pages 16 to 30
for i in range(16, 31):
    # Set the correct page title
    page_title = f'താൾ:സുവിശേഷം-ഭാഷാഗാനം.pdf/{i}'
    page = pywikibot.Page(site, page_title)

    # Set the content as the incrementing page number (starting from 12)
    page_number = i - 4  # This will give 12 for page 16
    content = f'<div style="text-align: center;">{page_number}</div>'

    # Check if the page exists
    if page.exists():
        # If the page exists, add content at the top
        new_content = content + "\n" + page.text
        page.text = new_content
        page.save(f"Added page number {page_number} at the top center")
    else:
        # If the page does not exist, create it with the content
        page.text = content
        page.save(f"Created page and added page number {page_number} at the top center")
        print(f"Page '{page_title}' was created.")

Sunday 9 June 2024

PAWS and Commons

 File Uploading to commons



Steps

Log in PAWS

Upload all files that are to be uploaded to commons in a folder.

For example : Here folder name is :Commons

I uploaded all pdf files there.


Then Create a python file : Example in the same folder.

I saved as upload_image.py  (You can have your own names)

Step 2

Paste the following code

import os

import pywikibot

from pywikibot.specialbots import UploadRobot


# Define the site

site = pywikibot.Site('commons', 'commons')


# Directory containing the files to upload

directory = '/home/paws/Commons/'


# List of files to upload

files_to_upload = [

    '1 (2).pdf',

    '1 (5).pdf'

]


# Iterate over the files and upload each one

for i, filename in enumerate(files_to_upload):

    file_path = os.path.join(directory, filename)


    # Generate the target filename

    target_filename = f'climate_worksheet_{i+1}.pdf'


    # Description of the file

    description = '''== {{int:filedesc}} ==

{{Information

|description={{en|1=CLimate worksheet done by Ali K3 is a remix of imageClimate change-environment.jpgby user:U3196787CC-BY-SA-4.0 CC0. Licensed CC BY SA 4.0.

}}

|source={{own}}

|author=[[User:Akbarali|Akbarali]]

|date=2024-06-09

|permission=

|other versions=

}}


== {{int:license-header}} ==

{{self|cc-by-sa-4.0}}


[[Category:Climate Educational worksheets]]

[[Category:Open Remix: Exploring Knowledge and Cultural Creativity]]

[[Category:Map worksheets]]

[[Category:Educational resources]]

[[Category:Humanities]]


'''


    # Create an instance of the upload bot

    bot = UploadRobot(url=[file_path],

                      description=description,

                      use_filename=target_filename,

                      keep_filename=True,

                      verify_description=False,

                      target_site=site)


    # Run the upload bot

    bot.run()

Step 3
Open Terminal and enter the following

@PAWS:~/Commons$ python upload_image.py  



NB: Give proper file names

Saturday 10 February 2024

Adding category using PAWD

First 

let us collect the wikipedia articles that has no category of people who died in 2023 using sparql

Sample : Link

Second:

Extract the title using MS Excel


Third

Paste the article title in PAWS as given below

PAWS

------------

import pywikibot


def add_category_to_article(page_title, category):

    site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

    page = pywikibot.Page(site, page_title)


    # Check if the page exists

    if not page.exists():

        print(f"Page '{page_title}' does not exist.")

        return


    # Check if the page already has the category

    if category in page.categories():

        print(f"Page '{page_title}' already has the category '{category}'.")

        return


    # Add the category to the page

    page.text += f"\n[[വർഗ്ഗം:{category}]]"

    page.save(f"കാറ്റഗറി ചേർക്കുന്നു '{category}' ")


def main():

    articles = [

"എസ്ഥേർ_ഈല്ലം",

"ദരൂഷ്_മെഹ്‌റൂജി"



    ]


    category = "2023-ൽ മരിച്ചവർ"


    for article in articles:

        add_category_to_article(article, category)


if __name__ == "__main__":

    main()



Fourth

Run the python file.



NB: Note, there should not be comma after the last title in python and recheck the category name properly.

It is based on wikidata statement: Date of death.

Saturday 27 January 2024

PAWS Fetch the pages from search box

 import requests

from bs4 import BeautifulSoup


search_url = "https://ml.wikipedia.org/w/index.php?title=പ്രത്യേകം:അന്വേഷണം&limit=500&offset=0&ns0=1&search=ഇസ്ലാം+ഇസ്ലാം&advancedSearch-current="


# Send a GET request to the search URL

response = requests.get(search_url)


# Check if the request was successful (status code 200)

if response.status_code == 200:

    # Parse the HTML content of the page

    soup = BeautifulSoup(response.text, 'html.parser')


    # Find the elements that contain article titles

    title_elements = soup.find_all('div', class_='mw-search-result-heading')


    # Extract and print the article titles

    for title_element in title_elements:

        article_title = title_element.text.strip()

        print(article_title)

else:

    print(f"Failed to retrieve search results. Status code: {response.status_code}")


Paws : Fixing spelling mistake (replace)

import pywikibot


site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

site.login()


# Read page titles from replace.txt

with open('replace.txt', 'r', encoding='utf-8') as file:

    page_titles = file.read().splitlines()


# Search and replace for each page title

search_term = 'അള്ളാഹു'

replace_term = 'അല്ലാഹു'


for title in page_titles:

    page = pywikibot.Page(site, title)

    page_text = page.text


    # Check if the page text already contains the replacement term

    if replace_term not in page_text:

        updated_text = page_text.replace(search_term, replace_term)


        # Edit the page with the updated text

        page.text = updated_text

        page.save(summary='Fixing misspelling')

    else:

        print(f'Skipping page {title} as it already contains the replacement term.')


Friday 26 January 2024

Edit article using PAWS

Bot Editing in Wikipedia

 

1.      Use Paws

https://hub-paws.wmcloud.org

 

2.      Log in

3.      Need 2 files : 1. Terminal   2. Txt file ( For eg: Test.txt)

4.      Enter the codes in Test.txt file

( Example:

 

 {{-start-}}

'''Article Title' 

Introduction of article

 

==History==

Need to enter content here

 

{{-stop-}}

 

5.      Come back to terminal and log in to your wiki for example:

@PAWS:~$ pwb.py login -lang:ml -family:wikipedia

6.      Then execute your test.txt file as given below

pwb.py pagefromfile -file:/home/paws/test.txt -lang:ml -family:wikipedia -force

 

This is to add content in the existing article.

 ----------------------------Part 2-------------------

Adding Category



import pywikibot


def add_category_to_article(page_title, category):

    site = pywikibot.Site('ml', 'wikipedia')  # Malayalam Wikipedia

    page = pywikibot.Page(site, page_title)


    # Check if the page exists

    if not page.exists():

        print(f"Page '{page_title}' does not exist.")

        return


    # Check if the page already has the category

    if category in page.categories():

        print(f"Page '{page_title}' already has the category '{category}'.")

        return


    # Add the category to the page

    page.text += f"\n[[വർഗ്ഗം:{category}]]"

    page.save(f"Adding category '{category}' to the page")


def main():

    articles = [

        "അഡിനോഫൈബ്രോമ",

        "അഡിനോമയോസിസ്",

        "ഒവേറിയൻ_ഫൈബ്രോമ",

        "ഒവേറിയൻ_സിസ്റ്റാഡിനോമ",

        "ഒവേറിയൻ_സീറസ്_സിസ്റ്റാഡിനോമ",

        "ഓപ്സോമെനോറിയ",

        "ഗർഭാശയ_പോളിപ്പ്",

        "ജനനേന്ദ്രിയ_അരിമ്പാറ",

        "ജയന്റ്_കോണ്ടൈലോമാ_അക്യൂമിനേറ്റം",

        "ജേം_സെൽ_ട്യൂമർ",

        "ജോൺ_സ്പിയേഴ്‌സ്",

        "മയോമ",

        "വജൈനൽ_യീസ്റ്റ്_ഇൻഫെക്ഷൻ"

    ]


    category = "രോഗങ്ങൾ"


    for article in articles:

        add_category_to_article(article, category)


if __name__ == "__main__":

    main()

Monday 15 January 2024

Python Query for Description

 import requests

import csv


def get_missing_labels(limit=10000):

    # Define the Wikidata endpoint and SPARQL query

    endpoint = "https://query.wikidata.org/sparql"

    query = f"""

    SELECT ?item ?itemLabel

    WHERE {{

      ?item wdt:P31 wd:Q202444;  # Instance of "male given name"

            schema:description ?description.

      FILTER(LANG(?description) = "en")  # English description

      FILTER NOT EXISTS {{

        ?item rdfs:label ?malayalamLabel.

        FILTER(LANG(?malayalamLabel) = "ml")  # Malayalam label does not exist

      }}

    }}

    LIMIT {limit}

    """


    # Set headers for the request

    headers = {

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

        'Accept': 'application/json',

        'Content-Type': 'application/json',

    }


    # Set parameters for the request

    params = {

        'query': query,

        'format': 'json'

    }


    # Make the request to the Wikidata API

    response = requests.get(endpoint, headers=headers, params=params)


    # Check if the request was successful (status code 200)

    if response.status_code == 200:

        data = response.json().get('results', {}).get('bindings', [])


        # Create a list to store the results

        results = []


        for item in data:

            entity_id = item.get('item', {}).get('value', '').split('/')[-1]

            english_label = item.get('itemLabel', {}).get('value', '')

            results.append({'entity_id': entity_id, 'english_label': english_label})


        return results

    else:

        print(f"Failed to retrieve data. Status code: {response.status_code}")

        return None


def write_to_csv(data, filename='missing_labels_male_given_name.csv'):

    # Write the results to a CSV file

    with open(filename, 'w', newline='', encoding='utf-8') as csv_file:

        fieldnames = ['entity_id', 'english_label']

        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()

        writer.writerows(data)


if __name__ == "__main__":

    # Run the function to get items with missing labels

    results = get_missing_labels(limit=10000)


    # Check if there are results

    if results:

        # Print the results

        print("Items with English description 'male given name' lacking Malayalam label (limited to 10000 results):")

        for result in results:

            print(f"{result['entity_id']}: {result['english_label']}")


        # Write the results to a CSV file

        write_to_csv(results, filename='missing_labels_male_given_name.csv')


        print("\nResults have been saved to missing_labels_male_given_name.csv.")

    else:

        print("No results obtained.")