PhantomJS

Composer.json (adodb, dom parser & spreadsheet optional)

  1. {
  2. "require": {
  3. "sunra/php-simple-html-dom-parser": "^1.5",
  4. "adodb/adodb-php": "^5.20",
  5. "jonnyw/php-phantomjs": "4.*",
  6. "phpoffice/phpspreadsheet": "^1.3"
  7. },
  8. "config": {
  9. "bin-dir": "bin"
  10. },
  11. "scripts": {
  12. "post-install-cmd": [
  13. "PhantomInstaller\\Installer::installPhantomJS"
  14. ],
  15. "post-update-cmd": [
  16. "PhantomInstaller\\Installer::installPhantomJS"
  17. ]
  18. }
  19.  
  20. }

 

 

  1. scrape-php-phantomjs.php
  2. <?php
  3.  
  4. require 'vendor/autoload.php';
  5.  
  6. use JonnyW\PhantomJs\Client;
  7. use JonnyW\PhantomJs\DependencyInjection\ServiceContainer;
  8.  
  9. $client = Client::getInstance();
  10. $request = $client->getMessageFactory()->createRequest();
  11. $response = $client->getMessageFactory()->createResponse();
  12.  
  13. $url = 'URL';
  14. $request->setUrl($url);
  15. $client->send($request,$response);
  16.  
  17. $htmlstr = $response->getContent();
  18. $dom = new DOMDocument;
  19. @$dom->loadHTML($htmlstr);
  20. $xpath = new DOMXPath($dom);
  21. $entries = [];
  22. $q_product = '//li[@class="CLASS NAME"]';
  23. foreach ($xpath->query($q_product) as $node) {
  24. $entries[] = [
  25. 'title' => $xpath->evaluate('string(.//h2[@class="CLASS NAME"]/a)',$node),
  26. 'price' => $xpath->evaluate('string(.//span[@class="CLASS NAME"][1])',$node)
  27. ];
  28. }
  29. var_dump($entries);
  30. ?>

 

  1. scrape-php-phantomjs-spreadsheet.php
  2. <?php
  3.  
  4. require 'vendor/autoload.php';
  5.  
  6. use JonnyW\PhantomJs\Client;
  7. use JonnyW\PhantomJs\DependencyInjection\ServiceContainer;
  8.  
  9. use PhpOffice\PhpSpreadsheet\Writer\Xlsx as Writer;
  10. use PhpOffice\PhpSpreadsheet\Reader\Xlsx as Reader;
  11.  
  12. $reader = new Reader();
  13. $spreadsheet = $reader->load('example.xlsx');
  14.  
  15. for($i=2;$i<=3;$i++){
  16. $sheet0 = $spreadsheet->getSheet(0);
  17. $cell = 'A'.$i;
  18. $code = $sheet0->getCell($cell)->getValue();
  19. $url = 'http://www.example.com?code='.$code;
  20. $client = Client::getInstance();
  21. $request = $client->getMessageFactory()->createRequest();
  22. $response = $client->getMessageFactory()->createResponse();
  23. $request->setUrl($url);
  24. $client->send($request,$response);
  25. $htmlstr = $response->getContent();
  26. $dom = new DOMDocument;
  27. @$dom->loadHTML($htmlstr);
  28. $xpath = new DOMXPath($dom);
  29. $entries = [];
  30. $q_1 = '//div[@id="ID NAME"]';
  31. foreach ($xpath->query($q_1) as $node) {
  32. $entries = [
  33. '1' => $xpath->evaluate('string(.//div[@class="CLASS NAME"]/table/tbody/tr[XXX]/td[XXX])',$node),
  34. '2' => $xpath->evaluate('string(.//div[@class="CLASS NAME"]/table/tbody/tr[XXX]/td[XXX])',$node),
  35. '3' => $xpath->evaluate('string(.//table[XXX]/tbody/tr[XXX]/td[XXX])',$node),
  36. ];
  37. }
  38. $sheet0->setCellValue('B'.$i,$entries[1]);
  39. $sheet0->setCellValue('C'.$i,$entries[2]);
  40. $sheet1 = $spreadsheet->getSheet(1);
  41. $sheet1->setCellValue('D'.$i,$entries[3]);
  42. }
  43. $writer = new Writer($spreadsheet);
  44. $writer->save('example.xlsx');
  45. ?>

Leave a Reply

You must be logged in to post a comment.