Разбор большого (100 МБ) JSON с PHP с использованием salsify JsonStreamerParser в PHPProBid

У меня есть файл JSON:

curl https://api.mercadolibre.com/sites/MLM/categories/all  > categoriesMLM.gz

который содержит объект объектов (около 60000+). Я также установил https://github.com/salsify/jsonstreamingparser через composer, чтобы сохранить каждый элемент в базе данных в PHPProBid, как написано в следующем коде:

public function SaveCategoryByStream(){
require_once dirname(__FILE__).'/../../../../../../vendor/autoload.php';
ini_set('memory_limit', '4024M');
ini_set('max_execution_time', 0);
$testfile = '/home/richi/Desktop/categoriesMLM.json';

$listener = new \JsonStreamingParser\Listener\GeoJsonListener(function ($category) {

$category_name = Array(
$category['name']
);
$category['id'] = preg_replace('/[^0-9]/', '', $category['id']);
$id = $order_id = Array(
$category['id']
);
$parent_id = $category['path_from_root'];
if($parent_id == null){

}else{
$parent_id = end($category['path_from_root']);
$parent_id = prev($category['path_from_root']);
$parent_id = preg_replace('/[^0-9]/', '', $parent_id['id']);
}
$new_category = Array(
'parent_id' => $parent_id,
'id' => $id,
'order_id' => $order_id,
'name' => $category_name,
'full_name' => $category_name
);
try {
$categoriesService = new Service\Table\Relational\Categories();
$categoriesService->save($new_category);
$parent_id = $id;//['id'];
//Saving all children categories of that specific category
for($ii = 0; $ii < count($category['children_categories']); $ii++){
$this->SaveChildrenCategory($parent_id, $category['children_categories'][$ii], $categoriesService);
}
}catch(Exception $e){
echo $e;
}
});
$stream = fopen($testfile, 'r');
try {
$parser = new \JsonStreamingParser\Parser($stream, $listener);
$parser->parse();
fclose($stream);
} catch (Exception $e) {
fclose($stream);
throw $e;
}

$controller = 'Mercadolibre';
$headline = $this->_('MeliSync');
$filter = 'first_time';
return array(
'controller' => $controller,
'headline'   => $headline,
'messages'   => $this->_flashMessenger->getMessages(),
'filter'     => $filter
);

}

После распаковки файл JSON выглядит примерно так:

{
"MLM5320": {
"id": "MLM5320",
"name": "Accesorios para Autos",
"picture": "http://resources.mlstatic.com/category/images/a076507d-2b2a-43a8-b08a-86a2197c3a82.png",
"permalink": null,
"total_items_in_this_category": 0,
"path_from_root": [
{
"id": "MLM1747",
"name": "Accesorios para Vehículos"},
{
"id": "MLM5320",
"name": "Accesorios para Autos"}
],
"children_categories": [
{
"id": "MLM92132",
"name": "Accesorios para el Exterior",
"total_items_in_this_category": 119421
},
{
"id": "MLM92130",
"name": "Accesorios para el Interior",
"total_items_in_this_category": 152424
},
{
"id": "MLM92140",
"name": "Otros",
"total_items_in_this_category": 0
}
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"buy_it_now",
"auction"],
"catalog_domain": null,
"coverage_areas": "not_allowed",
"currencies": [
"USD",
"MXN"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"not_specified",
"new",
"used"],
"items_reviews_allowed": false,
"listing_allowed": false,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": null,
"mirror_master_category": null,
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"me1",
"not_specified",
"custom"],
"shipping_options": [
"carrier",
"custom"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": false,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
},"MLM187169": {
"id": "MLM187169",
"name": "Alimentos para Bebés",
"picture": null,
"permalink": null,
"total_items_in_this_category": 1752,
"path_from_root": [
{
"id": "MLM1403",
"name": "Alimentos y Bebidas"},
{
"id": "MLM187169",
"name": "Alimentos para Bebés"}
],
"children_categories": [
{
"id": "MLM189061",
"name": "Leche",
"total_items_in_this_category": 934
},
{
"id": "MLM189062",
"name": "Papilla",
"total_items_in_this_category": 193
},
{
"id": "MLM189060",
"name": "Otros",
"total_items_in_this_category": 619
}
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"auction",
"buy_it_now"],
"catalog_domain": null,
"coverage_areas": "not_allowed",
"currencies": [
"USD",
"MXN"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"used",
"not_specified",
"new"],
"items_reviews_allowed": false,
"listing_allowed": false,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": null,
"mirror_master_category": "MLM39965",
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"custom",
"not_specified",
"me2",
"me1"],
"shipping_options": [
"carrier",
"custom"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": true,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
},
"MLM1403": {
"id": "MLM1403",
"name": "Alimentos y Bebidas",
"picture": "http://resources.mlstatic.com/category/images/57ad1885-5f74-4ac0-b54c-d0b4c575a371.png",
"permalink": "http://home.mercadolibre.com.mx/alimentos-bebidas/",
"total_items_in_this_category": 72668,
"path_from_root": [
{
"id": "MLM1403",
"name": "Alimentos y Bebidas"}
],
"children_categories": [
{
"id": "MLM187169",
"name": "Alimentos para Bebés",
"total_items_in_this_category": 1584
},
{
"id": "MLM189607",
"name": "Bebidas con Alcohol",
"total_items_in_this_category": 4593
},
{
"id": "MLM178700",
"name": "Bebidas sin Alcohol",
"total_items_in_this_category": 12501
},
{
"id": "MLM1423",
"name": "Comestibles",
"total_items_in_this_category": 37780
},
{
"id": "MLM187587",
"name": "Kit de Preparación de Cerveza",
"total_items_in_this_category": 300
},
{
"id": "MLM191759",
"name": "Lúpulo",
"total_items_in_this_category": 0
},
{
"id": "MLM189225",
"name": "Yerba Mate",
"total_items_in_this_category": 195
},
{
"id": "MLM1417",
"name": "Otros",
"total_items_in_this_category": 15146
}
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"buy_it_now",
"auction"],
"catalog_domain": null,
"coverage_areas": "not_allowed",
"currencies": [
"USD",
"MXN"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"used",
"not_specified",
"new"],
"items_reviews_allowed": false,
"listing_allowed": false,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": null,
"mirror_master_category": null,
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"me2",
"not_specified",
"me1",
"custom"],
"shipping_options": [
"carrier",
"custom"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": false,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
},
"MLM92150": {
"id": "MLM92150",
"name": "Accesorios para Cuatrimotos",
"picture": "http://resources.mlstatic.com/category/images/61189af5-4e70-45b6-89fa-95339ccc2166.png",
"permalink": null,
"total_items_in_this_category": 2360,
"path_from_root": [
{
"id": "MLM1747",
"name": "Accesorios para Vehículos"},
{
"id": "MLM92150",
"name": "Accesorios para Cuatrimotos"}
],
"children_categories": [
{
"id": "MLM165695",
"name": "Caja",
"total_items_in_this_category": 136
},
{
"id": "MLM189905",
"name": "Fundas",
"total_items_in_this_category": 52
},
{
"id": "MLM165696",
"name": "Rampas",
"total_items_in_this_category": 69
},
{
"id": "MLM165694",
"name": "Ropa",
"total_items_in_this_category": 165
},
{
"id": "MLM165697",
"name": "Winch",
"total_items_in_this_category": 451
},
{
"id": "MLM165698",
"name": "Otros",
"total_items_in_this_category": 1486
}
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"buy_it_now",
"auction"],
"catalog_domain": null,
"coverage_areas": "not_allowed",
"currencies": [
"MXN",
"USD"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"not_specified",
"used",
"new"],
"items_reviews_allowed": false,
"listing_allowed": false,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": null,
"mirror_master_category": null,
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"custom",
"me1",
"not_specified"],
"shipping_options": [
"carrier",
"custom"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": false,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
},
"MLM189061": {
"id": "MLM189061",
"name": "Leche",
"picture": null,
"permalink": null,
"total_items_in_this_category": 114,
"path_from_root": [
{
"id": "MLM1403",
"name": "Alimentos y Bebidas"},
{
"id": "MLM187169",
"name": "Alimentos para Bebés"},
{
"id": "MLM189061",
"name": "Leche"}
],
"children_categories": [
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"auction",
"buy_it_now"],
"catalog_domain": "MLM-BABIES_FORMULA",
"coverage_areas": "not_allowed",
"currencies": [
"USD",
"MXN"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"used",
"not_specified",
"new"],
"items_reviews_allowed": false,
"listing_allowed": true,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": "MLM189058",
"mirror_master_category": "MLM189058",
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"custom",
"not_specified",
"me2",
"me1"],
"shipping_options": [
"carrier",
"custom"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": true,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
},
"MLM1071": {
"id": "MLM1071",
"name": "Animales y Mascotas",
"picture": "http://resources.mlstatic.com/category/images/0b18438b-f56c-421a-a020-f581f40f4c24.png",
"permalink": "http://home.mercadolibre.com.mx/animales/",
"total_items_in_this_category": 0,
"path_from_root": [
{
"id": "MLM1071",
"name": "Animales y Mascotas"}
],
"children_categories": [
{
"id": "MLM1100",
"name": "Aves",
"total_items_in_this_category": 0
},
{
"id": "MLM189310",
"name": "Conejos",
"total_items_in_this_category": 4006
},
{
"id": "MLM1117",
"name": "Equinos",
"total_items_in_this_category": 4128
},
{
"id": "MLM1081",
"name": "Gatos",
"total_items_in_this_category": 35580
},
{
"id": "MLM3615",
"name": "Libros de animales",
"total_items_in_this_category": 10917
},
{
"id": "MLM1091",
"name": "Peces",
"total_items_in_this_category": 2218
},
{
"id": "MLM1072",
"name": "Perros",
"total_items_in_this_category": 0
},
{
"id": "MLM1111",
"name": "Reptiles y Anfibios",
"total_items_in_this_category": 420
},
{
"id": "MLM1105",
"name": "Roedores",
"total_items_in_this_category": 8093
},
{
"id": "MLM1126",
"name": "Otros",
"total_items_in_this_category": 2736
}
],
"attribute_types": "attributes",
"settings": {
"adult_content": false,
"buying_allowed": true,
"buying_modes": [
"buy_it_now",
"auction"],
"catalog_domain": null,
"coverage_areas": "not_allowed",
"currencies": [
"USD",
"MXN"],
"fragile": false,
"immediate_payment": "required",
"item_conditions": [
"new",
"used",
"not_specified"],
"items_reviews_allowed": false,
"listing_allowed": false,
"max_description_length": 50000,
"max_pictures_per_item": 12,
"max_pictures_per_item_var": 10,
"max_sub_title_length": 70,
"max_title_length": 60,
"maximum_price": null,
"minimum_price": null,
"mirror_category": null,
"mirror_master_category": null,
"mirror_slave_categories": [
],
"price": "required",
"reservation_allowed": "not_allowed",
"restrictions": [
],
"rounded_address": false,
"seller_contact": "not_allowed",
"shipping_modes": [
"custom",
"not_specified",
"me1"],
"shipping_options": [
"custom",
"carrier"],
"shipping_profile": "optional",
"show_contact_information": false,
"simple_shipping": "optional",
"stock": "required",
"sub_vertical": null,
"subscribable": false,
"tags": [
],
"vertical": null,
"vip_subdomain": "articulo"},
"meta_categ_id": null,
"attributable": false
}
}

или в удобочитаемом виде:

{
"MLMXXXXX":{...},
"MLMXXXY":{...},
...
}

Тем не менее, когда я вызываю эту функцию, она застревает после сохранения 3552. Я также читал, что GeoJsonListener загружает JSON в память. Мой вопрос заключается в том, как мне создать Listener, который загружает каждый объект отдельно, а не загружает весь JSON в память.

Заранее спасибо!

Вот результат 3552-го пункта:

{ id: 'MLM45922',
name: 'Mitsubishi',
picture: null,
permalink: null,
total_items_in_this_category: 39,
path_from_root:
[ { id: 'MLM1747', name: 'Accesorios para Vehículos' },
{ id: 'MLM179617', name: 'Tuning y Performance' },
{ id: 'MLM179724', name: 'Performance' },
{ id: 'MLM4859', name: 'Filtros Alto Flujo' },
{ id: 'MLM45922', name: 'Mitsubishi' } ],
children_categories: [],
attribute_types: 'attributes',
settings:
{ adult_content: false,
buying_allowed: true,
buying_modes: [ 'buy_it_now', 'auction' ],
catalog_domain: null,
coverage_areas: 'not_allowed',
currencies: [ 'USD', 'MXN' ],
fragile: false,
immediate_payment: 'required',
item_conditions: [ 'used', 'not_specified', 'new' ],
items_reviews_allowed: false,
listing_allowed: true,
max_description_length: 50000,
max_pictures_per_item: 12,
max_pictures_per_item_var: 10,
max_sub_title_length: 70,
max_title_length: 60,
maximum_price: null,
minimum_price: null,
mirror_category: null,
mirror_master_category: null,
mirror_slave_categories: [],
price: 'required',
reservation_allowed: 'not_allowed',
restrictions: [],
rounded_address: false,
seller_contact: 'not_allowed',
shipping_modes: [ 'not_specified', 'custom', 'me1', 'me2' ],
shipping_options: [ 'custom', 'carrier' ],
shipping_profile: 'optional',
show_contact_information: false,
simple_shipping: 'optional',
stock: 'required',
sub_vertical: null,
subscribable: false,
tags: [],
vertical: null,
vip_subdomain: 'articulo' },
meta_categ_id: null,
attributable: false }

0

Решение

GeoJSONListener делает именно то, что вы хотите сделать — он сохраняет второй уровень объектов в памяти. Таким образом, он загружает каждый объект MLM в ваш файл сам по себе — он не загружает весь файл в память.

Тестируя код, включенный в файл, на который вы ссылаетесь (и уменьшая лимит памяти до 32 МБ, поскольку потоковому анализатору не требуется 4 ГБ памяти), он анализирует весь файл, просматривая 27200 объектов, прежде чем я отменил процесс, примерно за 10 минут на старом Macbook.

Это заставляет меня поверить, что проблема не связана с вашим анализатором JSON или тем, как вы анализируете файл, и может быть вызвана чем-то другим (например, ваш хост / веб-сервер не учитывает вызовы set_time_limit или ваша база данных блокирует или блокирует некоторый контент.

0

Другие решения

Других решений пока нет …

По вопросам рекламы [email protected]