Monday 15 January 2024

Python Query for Description

 import requests

import csv


def get_missing_labels(limit=10000):

    # Define the Wikidata endpoint and SPARQL query

    endpoint = "https://query.wikidata.org/sparql"

    query = f"""

    SELECT ?item ?itemLabel

    WHERE {{

      ?item wdt:P31 wd:Q202444;  # Instance of "male given name"

            schema:description ?description.

      FILTER(LANG(?description) = "en")  # English description

      FILTER NOT EXISTS {{

        ?item rdfs:label ?malayalamLabel.

        FILTER(LANG(?malayalamLabel) = "ml")  # Malayalam label does not exist

      }}

    }}

    LIMIT {limit}

    """


    # Set headers for the request

    headers = {

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

        'Accept': 'application/json',

        'Content-Type': 'application/json',

    }


    # Set parameters for the request

    params = {

        'query': query,

        'format': 'json'

    }


    # Make the request to the Wikidata API

    response = requests.get(endpoint, headers=headers, params=params)


    # Check if the request was successful (status code 200)

    if response.status_code == 200:

        data = response.json().get('results', {}).get('bindings', [])


        # Create a list to store the results

        results = []


        for item in data:

            entity_id = item.get('item', {}).get('value', '').split('/')[-1]

            english_label = item.get('itemLabel', {}).get('value', '')

            results.append({'entity_id': entity_id, 'english_label': english_label})


        return results

    else:

        print(f"Failed to retrieve data. Status code: {response.status_code}")

        return None


def write_to_csv(data, filename='missing_labels_male_given_name.csv'):

    # Write the results to a CSV file

    with open(filename, 'w', newline='', encoding='utf-8') as csv_file:

        fieldnames = ['entity_id', 'english_label']

        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()

        writer.writerows(data)


if __name__ == "__main__":

    # Run the function to get items with missing labels

    results = get_missing_labels(limit=10000)


    # Check if there are results

    if results:

        # Print the results

        print("Items with English description 'male given name' lacking Malayalam label (limited to 10000 results):")

        for result in results:

            print(f"{result['entity_id']}: {result['english_label']}")


        # Write the results to a CSV file

        write_to_csv(results, filename='missing_labels_male_given_name.csv')


        print("\nResults have been saved to missing_labels_male_given_name.csv.")

    else:

        print("No results obtained.")




0 comments:

Post a Comment