import asyncio
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
from crawlee.sessions import SessionPool, SessionCookies
from crawlee.browsers import BrowserPool
from crawlee.browsers._playwright_browser_plugin import PlaywrightBrowserPlugin



async def main() -> None:
    #placeholder cookie variable, given that cookies tend to track information about the user i'm not the most comfortable sharing them
    #so replace this dictionary with cookies related to the website being craled
    dummyCookie = {
        "name": 'abcd',
        'value': 'xyz',
        'domain': '.youtube.com',
        'path': '/',
        'expires': 1,
        'httpOnly': True
    }
    #this is where the cookie implimentation breaks
    browserPlugin = PlaywrightBrowserPlugin(
        browser_new_context_options={"storage_state": {"cookie": dummyCookie}}
        )
    #push the browser context into the browser pool
    browserPoolVar = BrowserPool(plugins=[browserPlugin])

    #setup the crawler variable so that only one session runs, judging from previous reports cookies don't work with multiple sessions? 
    crawler = PlaywrightCrawler(
        use_session_pool=True, 
        max_session_rotations=0,
        browser_pool=browserPoolVar,
        session_pool=SessionPool(max_pool_size=1)
    )
    #using a dummy link for running the crawler, but replace this with another url and its associated cookies for actually testing this
    await crawler.run(['https://www.youtube.com/'])
    
    #generic default handler implimentation that's lifted from crawlee's docs
    @crawler.router.default_handler
    async def request_handler(context: PlaywrightCrawlingContext) -> None:
        context.log.info(f'Processing {context.request.url}...')
        currentPage = context.page 
        data = {
            'url': context.request.url,
            'title': await currentPage.title()
        }
        await context.push_data(data=data)


if __name__ == '__main__':
    asyncio.run(main())