sars-scanner/parse-rki.php

53 lines
2.0 KiB
PHP
Executable File

#! /usr/bin/php
<?php
include "config.php";
$sql = "SELECT * FROM scans where site=1 and parsed=0";
$stmt = $pdo->query($sql);
$scans = $stmt->fetchAll(PDO::FETCH_ASSOC);
foreach ($scans as $scan){
$html = $scan['content'];
$parts = explode('<h1 class="firstInSlot"><abbr title="Schweres Akutes Respiratorisches Syndrom Coronavirus 2">SARS-CoV-2</abbr>: Fallzahlen in Deutschland, China und weltweit</h1>', $html);
$page = new DOMDocument();
$page->loadHTML($html);
$document = array();
for ($i = 0; $i < $page->getElementsByTagName("table")->length; $i++){
$table = $page->getElementsByTagName("table")[$i];
for ($j = 0; $j < $table->childNodes->length; $j++){
$sub = $table->childNodes[$j];
for ($k = 0; $k < $sub->childNodes->length; $k++){
$zeile = array();
$tr = $sub->childNodes[$k];
for ($l = 0; $l < $tr->childNodes->length; $l++){
if($tr->childNodes[$l]->textContent != ''){
if ($i == 0 and $l == 0){
$zeile[] = 'D-'.str_replace(".", "", $tr->childNodes[$l]->textContent);
}else{
$zeile[] = str_replace(".", "", $tr->childNodes[$l]->textContent);
}
}
}
$document[] = $zeile;
}
}
}
$scanid = $scan['id'];
foreach ($document as $entry){
$sql = "INSERT INTO `data`(`scan`, `place`, `amount`) VALUES (?, ?, ?)";
$stmt = $pdo->prepare($sql);
$stmt->bindParam(1, $scanid);
$stmt->bindParam(2, $entry[0]);
$stmt->bindParam(3, $entry[1]);
$stmt->execute();
if($stmt->errorInfo()[0] != '00000' and $stmt->errorInfo()[0] != '23000' and $stmt->errorInfo()[0] != '22007')
var_dump($stmt->errorInfo());
}
$sql = "UPDATE `scans` SET `parsed`=1 WHERE id=?";
$stmt = $pdo->prepare($sql);
$stmt->bindParam(1, $scanid);
$stmt->execute();
}