PhantomJS
Composer.json (adodb, dom parser & spreadsheet optional)
- {
- "require": {
- "sunra/php-simple-html-dom-parser": "^1.5",
- "adodb/adodb-php": "^5.20",
- "jonnyw/php-phantomjs": "4.*",
- "phpoffice/phpspreadsheet": "^1.3"
- },
- "config": {
- "bin-dir": "bin"
- },
- "scripts": {
- "post-install-cmd": [
- "PhantomInstaller\\Installer::installPhantomJS"
- ],
- "post-update-cmd": [
- "PhantomInstaller\\Installer::installPhantomJS"
- ]
- }
-
- }
- scrape-php-phantomjs.php
- <?php
-
- require 'vendor/autoload.php';
-
- use JonnyW\PhantomJs\Client;
- use JonnyW\PhantomJs\DependencyInjection\ServiceContainer;
-
- $client = Client::getInstance();
- $request = $client->getMessageFactory()->createRequest();
- $response = $client->getMessageFactory()->createResponse();
-
- $url = 'URL';
- $request->setUrl($url);
- $client->send($request,$response);
-
- $htmlstr = $response->getContent();
- $dom = new DOMDocument;
- @$dom->loadHTML($htmlstr);
- $xpath = new DOMXPath($dom);
- $entries = [];
- $q_product = '//li[@class="CLASS NAME"]';
- foreach ($xpath->query($q_product) as $node) {
- $entries[] = [
- 'title' => $xpath->evaluate('string(.//h2[@class="CLASS NAME"]/a)',$node),
- 'price' => $xpath->evaluate('string(.//span[@class="CLASS NAME"][1])',$node)
- ];
- }
- ?>
- scrape-php-phantomjs-spreadsheet.php
- <?php
-
- require 'vendor/autoload.php';
-
- use JonnyW\PhantomJs\Client;
- use JonnyW\PhantomJs\DependencyInjection\ServiceContainer;
-
- use PhpOffice\PhpSpreadsheet\Writer\Xlsx as Writer;
- use PhpOffice\PhpSpreadsheet\Reader\Xlsx as Reader;
-
- $reader = new Reader();
- $spreadsheet = $reader->load('example.xlsx');
-
- for($i=2;$i<=3;$i++){
- $sheet0 = $spreadsheet->getSheet(0);
- $cell = 'A'.$i;
- $code = $sheet0->getCell($cell)->getValue();
- $url = 'http://www.example.com?code='.$code;
- $client = Client::getInstance();
- $request = $client->getMessageFactory()->createRequest();
- $response = $client->getMessageFactory()->createResponse();
- $request->setUrl($url);
- $client->send($request,$response);
- $htmlstr = $response->getContent();
- $dom = new DOMDocument;
- @$dom->loadHTML($htmlstr);
- $xpath = new DOMXPath($dom);
- $entries = [];
- $q_1 = '//div[@id="ID NAME"]';
- foreach ($xpath->query($q_1) as $node) {
- $entries = [
- '1' => $xpath->evaluate('string(.//div[@class="CLASS NAME"]/table/tbody/tr[XXX]/td[XXX])',$node),
- '2' => $xpath->evaluate('string(.//div[@class="CLASS NAME"]/table/tbody/tr[XXX]/td[XXX])',$node),
- '3' => $xpath->evaluate('string(.//table[XXX]/tbody/tr[XXX]/td[XXX])',$node),
- ];
- }
- $sheet0->setCellValue('B'.$i,$entries[1]);
- $sheet0->setCellValue('C'.$i,$entries[2]);
- $sheet1 = $spreadsheet->getSheet(1);
- $sheet1->setCellValue('D'.$i,$entries[3]);
- }
- $writer = new Writer($spreadsheet);
- $writer->save('example.xlsx');
- ?>