Playwright Python Tutorial

Sync Playwright Basics


    from playwright.sync_api import sync_playwright
    with sync_playwright() as p:
    browser = p.chromium.launch(headless=False, slow_mo=500)
    page = browser.new_page()

    #Launch Browser maximum Screen

    browser = playwright.chromium.launch(headless=False, args=["--start-maximized"])
    context = browser.new_context(no_viewport=True)
    page = context.new_page()

    #wait for the page load
    1- page.goto(url, wait_until='load')
    2- page.wait_for_load_state('domcontentloaded')

slow_mo=500 slows down the automation by 500ms between actions for better visibility.

REPL Mode (Interactive Browser Session)

To work with the browser in real-time without it closing automatically (like Jupyter notebook):

# In your terminal type:
python 

# This will open Python REPL where you can write your script, test line by line
# The browser will remain open until you manually close it
# You can then interact with the page directly:

# Example commands in REPL:
page.goto("https://example.com")  # Navigate to new URL
page.click("button")  # Click elements
print(page.title())  # Get page title
# Continue working as long as you need

Note:here is the code to create instance 
playwright= sync_playwright().start()
After completing work run following commands
playwright.stop()

Alternative Method: You can also use input() at the end of your script to pause execution:

# At the end of your script:
input("Press Enter to close browser...")
browser.close()

Use Python REPL Run:"python" using terminal
Pro Tip: For even better interactive experience, use ipython instead of regular Python REPL:

pip install ipython
ipython -i your_script.py

IPython provides tab completion and better introspection of objects.

Locating Elements

By Role

link = page.get_by_role('link', name="linkname")
# Matches: <a href='link'>linkname</a>

Current URL

print(page.url)  # Prints current page URL

Highlight Elements

button.highlight()  # Visually highlights the element

Different Locator Types

Print Text of element


element_locator = page.locator("selector_for_your_element")
text = element_locator.text_content()
print(f"Text content: {text}")

By Label

page.get_by_label('EMAIL').highlight()
# Matches input with matching label

By Placeholder

page.get_by_placeholder('EMAIL').highlight()
# Matches input with matching placeholder

By Text

page.get_by_text('Hi this is abdul').highlight()
# Loose match (default)
page.get_by_text('Hi this is abdul', exact=True).highlight()
# Exact match

By Alt Text

page.get_by_alt_text("kid eating apple").highlight()
# Targets image with matching alt text

By Title Attribute

page.get_by_title("attribute").highlight()
# Caution: Same title may be on multiple elements

CSS Selector Locators

page.locator('h1').highlight()  # Tag selector
page.locator('button.btn-outline-success').highlight()  # Class selector
page.locator('button#btn-group-drop1').highlight()  # ID selector

Attribute Selectors

page.locator('input[readonly]').highlight()
page.locator('input[value="correct value"]').highlight()

CSS Hierarchy

# Parent > Child selector
page.locator('nav.bg-dark a.nav-link.active').highlight()
page.locator('div.bas-component > ul.list-group').highlight()

CSS Pseudo Classes

Text Pseudo Class

# Loose match (contains text)
page.locator('h1:text("Navbars")').highlight()

# Exact match
page.locator('h1:text-is("Navbars")').highlight()

Visible Pseudo Class

page.locator('div.dropdown-menu:visible').highlight()
# Targets only visible elements

Nth Match

page.locator(':nth-match(button.btn-primary, 4)')
# Selects 4th matching button
page.locator(':nth-match(button:text("primary"), 1)')
# Selects 1st matching button with text

XPath Locators

# Absolute path (starts from root)
page.locator('xpath=/html/body/h1').highlight()

# Relative path
page.locator('xpath=//h1[@id="idname"]').highlight()
page.locator('//input[@readonly]').highlight()
page.locator('//input[@value="wrong value"]').highlight()

Note: The 'xpath=' prefix is optional in Playwright.

Other Locators

Nth Element Selection

# Using get_by_role
page.get_by_role('button', name='primary').locator('nth=0').highlight()

# Using locator
page.locator('button').locator('nth=0').highlight()

Mouse Actions

targeted_element = page.get_by_role('button', name='primary').first

# Double click
targeted_element.dblclick()
targeted_element.dblclick(delay=500)  # Slower for visibility

# Right click
targeted_element.click(button='right')

# Click with modifier keys
targeted_element.click(modifiers=['Shift'])
targeted_element.click(modifiers=['Shift', 'Ctrl', 'Meta'])

# Hover
outline_button = page.locator('button.btn-outline-primary')
outline_button.hover()

Input Field Actions

input = page.get_by_placeholder('EMAIL').highlight()

# Fill (fast, like paste)
input.fill('abdul@gmail.com')

# Type (simulates typing)
input.type('abdul@gmail.com')
input.type('abdul@gmail.com', delay=200)  # With delay

# Clear
input.clear()

# Get input value
input_value = input.input_value()

Checkbox and Radio Inputs

# Radio button
radio_option2 = page.get_by_label('Option 2')
radio_option2.check()
radio_option2.is_checked()  # Returns boolean

# Checkbox
check_option2 = page.get_by_label('Option 2')
check_option2.check()
check_option2.uncheck()
check_option2.set_checked(True)  # or False
check_option2.click()  # Toggles state

# Switch field
switch_field = page.get_by_label('Toggle Switch')
switch_field.check()
switch_field.uncheck()

Select Menu

# Single select
select_option = page.get_by_label('Example select')
select_option.select_option('value')  # Value from dropdown

# Multi select
multi_select = page.get_by_label('Example Multi select')
multi_select.select_option(['2', '4'])  # Multiple values
multi_select.select_option(['1', '2', '5'])

Warning: Using values that don't exist in the dropdown will cause a timeout error.

Dropdown Menu

# Click dropdown button
button = page.locator('button#btnGroupDropdown1')
button.click()

# Select from visible dropdown
dropdown_link = page.locator('div.dropdown-menu:visible a:text("Dropdown link")').last
dropdown_link.click()

File Uploads

Standard Input

file_input = page.get_by_label('Default file input example')

# Single file
file_input.set_input_files("file.txt")

# Multiple files
file_input.set_input_files(["file.txt", "app.py"])

Button-triggered Upload

with page.expect_file_chooser() as fc_info:
    file_input.click()
    
file_chooser = fc_info.value
file_chooser.set_files('app.py')

Tip: If highlighting bothers you, target the footer:

page.locator('footer').highlight()

Async Playwright Basics

import asyncio
from playwright.async_api import async_playwright

async def main():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        await page.goto("https://example.com")
        print(await page.title())
        await browser.close()

asyncio.run(main())

When to Use Async Playwright

✅ Use Async When:

Scraping multiple pages simultaneously
Running performance tests with many virtual users
Handling multiple browser sessions
Executing long-running tasks without blocking
Need better resource utilization

❌ Avoid Async When:

Writing simple, linear scripts
Working with small-scale automation
Debugging (sync is easier to debug)
When dependencies don't support async

Sync vs Async Comparison

Feature	Sync Playwright	Async Playwright
Code Structure	Linear, easy to follow	Requires async/await
Performance	One operation at a time	Concurrent operations
Browser Control	Single browser session	Multiple sessions
Learning Curve	Easier for beginners	Requires async knowledge
Best For	Simple scripts, testing	Large-scale automation

Concurrent Page Handling

import asyncio
from playwright.async_api import async_playwright

async def scrape_page(url):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        title = await page.title()
        await browser.close()
        return title

async def main():
    urls = [
        'https://example.com',
        'https://google.com',
        'https://github.com'
    ]
    
    # Run all pages concurrently
    results = await asyncio.gather(*[scrape_page(url) for url in urls])
    print(results)

asyncio.run(main())

Async Best Practices

1. Use Semaphores for Rate Limiting

async def limited_task(semaphore, url):
    async with semaphore:
        return await scrape_page(url)

async def main():
    semaphore = asyncio.Semaphore(5)  # Max 5 concurrent
    tasks = [limited_task(semaphore, url) for url in urls]
    await asyncio.gather(*tasks)

2. Always Close Resources

Use async with or manually await browser.close() to prevent resource leaks.

3. Error Handling

Always wrap async operations in try-except blocks as unhandled exceptions can be silent.

Advanced Async Patterns

Timeout Handling

try:
    await page.goto('https://example.com', timeout=10000)
except Exception as e:
    print(f"Timeout occurred: {e}")

Parallel Different Tasks

async def multi_task():
    task1 = page.goto('https://example.com')
    task2 = page.wait_for_selector('h1')
    await asyncio.gather(task1, task2)