feat: xml

- add new feature: xml parser

Signed-off-by: Pakin <pakin.t@forth.co.th>
This commit is contained in:
Pakin 2026-04-17 11:05:17 +07:00
parent 6219459e3e
commit 21984bdfba
7 changed files with 409 additions and 6 deletions

19
Cargo.lock generated
View file

@ -341,9 +341,9 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.16.1"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
[[package]]
name = "hmac"
@ -482,9 +482,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.13.0"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
dependencies = [
"equivalent",
"hashbrown",
@ -583,7 +583,9 @@ dependencies = [
"chrono",
"flate2",
"git2",
"indexmap",
"log",
"quick-xml",
"rand",
"rayon",
"serde",
@ -758,6 +760,15 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.39.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d"
dependencies = [
"memchr",
]
[[package]]
name = "quote"
version = "1.0.43"

View file

@ -7,7 +7,9 @@ edition = "2024"
chrono = "0.4.41"
flate2 = "1.1.2"
git2 = "0.20.2"
indexmap = "2.14.0"
log = "0.4.27"
quick-xml = "0.39.2"
rand = "0.9.2"
rayon = "1.10.0"
serde = { version = "1.0.219", features = ["derive", "serde_derive"] }

225
README.md
View file

@ -26,6 +26,7 @@ let recipe_dir = cfg.get("RECIPE_DIR").unwrap();
---
### Get recipe from specific country (latest)
```rust
...
let latest_versions = grep_latest_versions(recipe_dir).unwrap();
@ -69,7 +70,6 @@ import::generate_recipe_sheet_table("mys", 626);
### Notes
Simple Snippet Patterns
```rust
@ -87,4 +87,225 @@ let pure_mat_id = if curr_rpl_mat_id > 300000 {
} else {
curr_rpl_mat_id
};
```
```
---
### XML Parser (Experimental)
This will parse xml file and create node structure type `Vec<Node>` where `Node` is from `crate::xml::node`
Xml-related functions
- `parse_xml_to_tree`: this will parse raw xml string into node structure.
- `print_tree`: printing node structure to xml
- `generate_nodes_from_xml`: generate node index map from list of `(catalog_name, catalog_path)`
Node functions
- `find_by_child_value`: search expected value from node (`parent_node`) which this value should be in node named `child_name`.
- `get_child`: get child node from current node's children
Shortcut Macro
- `get_path`: macro for accessing the node inner child by provided key `key1.key2.key3...`
Example of parsing xml layout v3 and generate into `new-layout-v2` format
```rust
use libtbr::xml::*;
// ...
let taobin_dir = cfg
.get("TAOBIN_REPO")
.expect("Taobin directory path not provided");
let v3_dir = format!("{taobin_dir}/inter/ltu/xml/multi/v3");
// pre-defined paths configuration in format (catalog_name, catalog_path)
let v3_catalogs = vec![
(
"recommend",
format!("{ltu_v3_dir}/event/event_v3/active_promotions.lxml"),
),
(
"coffee",
format!("{ltu_v3_dir}/page_catalog_group_coffee.lxml"),
),
("milk", format!("{ltu_v3_dir}/page_catalog_group_milk.lxml")),
("tea", format!("{ltu_v3_dir}/page_catalog_group_tea.lxml")),
(
"health",
format!("{ltu_v3_dir}/page_catalog_group_health.lxml"),
),
(
"other",
format!("{ltu_v3_dir}/page_catalog_group_other.lxml"),
),
];
// input must be `Vec<(&str, String)>`
let mut v3_catalog_nodes: IndexMap<String, Vec<Node>> =
generate_nodes_from_xml(v3_catalogs)?;
for (_, (catalog_name, catalog_nodes)) in v3_catalog_nodes.iter().enumerate() {
if catalog_nodes.len() == 1
&& let Some(root_node) = catalog_nodes.first()
{
// get_path is a macro for accessing the node inner child
//
// usage: get_path!(root_node, key1.key2.key3...);
//
let current_menus_result: Option<&Node> = get_path!(root_node, ScrollableCatalog.Menus);
if let Some(current_menus) = current_menus_result {
println!(
"Name={},file=page_catalog_group_{}.skt",
catalog_name, catalog_name
);
let ccm = current_menus.clone();
for menu_block in ccm.children.clone() {
let mut name_row = String::from("\tname\t");
let mut desc_row = String::from("\tdesc\t");
let mut img_row = String::from("\timg\t");
let tag_filter_option = get_path!(menu_block, TagFilter);
let idle_image_tag = match get_path!(menu_block, IdleImage) {
Some(img_path) => {
let img_path = img_path.clone().value.unwrap_or("".to_string());
let img_path_spl: Vec<String> = img_path
.trim()
.replace("\"", "")
.split("/")
.map(|x| x.to_string())
.collect();
img_path_spl
.last()
.unwrap()
.replace("[amp]", "&")
.to_string()
}
None => "".to_string(),
};
img_row.push_str(format!("{idle_image_tag}\t-\t-\t-\t\t\t||||||||||||||||||||||||||\t||||||||||||||||||||||||||\t||||||||||||||||||||||||||\t\t\t\t\t\t\t\t-\t-\t-\t-\t-\t").as_str());
let hot_state_val = match get_path!(menu_block, HotState) {
Some(state) => state
.clone()
.value
.and_then(|x| {
if x.to_string().contains("Disable2") {
return Some("-".to_string());
} else {
return Some(x.replace("$", "").replace(".Button", ""));
}
})
.unwrap()
.trim()
.to_string(),
None => "-".to_string(),
};
let ice_state_val = match get_path!(menu_block, IceState) {
Some(state) => state
.clone()
.value
.and_then(|x| {
if x.to_string().contains("Disable2") {
return Some("-".to_string());
} else {
return Some(x.replace("$", "").replace(".Button", ""));
}
})
.unwrap()
.trim()
.to_string(),
None => "-".to_string(),
};
let blend_state_val = match get_path!(menu_block, BlendState) {
Some(state) => state
.clone()
.value
.and_then(|x| {
if x.to_string().contains("Disable2") {
return Some("-".to_string());
} else {
return Some(x.replace("$", "").replace(".Button", ""));
}
})
.unwrap()
.trim()
.to_string(),
None => "-".to_string(),
};
let names = match get_path!(menu_block, Name.LanguageGroup) {
Some(names) => names.clone(),
None => Node::default(),
};
// Description
let descs = match get_path!(menu_block, Description.LanguageGroup) {
Some(descs) => descs.clone(),
None => Node::default(),
};
for name in names.children.clone() {
if let Some(value) = name.value {
name_row.push_str(format!("{value}\t").replace("[amp]", "&").as_str());
} else {
name_row.push_str(format!("\t").as_str());
}
}
name_row.push_str(
format!(
"{},-\t{},-\t{},-\t\t\t\t\t\t\t\t-\t-\t-\t-\t{}",
hot_state_val,
ice_state_val,
blend_state_val,
tag_filter_option
.clone()
.unwrap_or(&Node::default())
.value
.clone()
.unwrap_or("-".to_string())
.trim()
.replace("\"", "")
)
.as_str(),
);
for desc in descs.children.clone() {
if let Some(value) = desc.value {
desc_row.push_str(format!("{value}\t").replace("[amp]", "&").as_str());
} else {
desc_row.push_str(format!("\t").as_str());
}
}
desc_row.push_str(
format!(
"||||||||||||||||||||||||||\t||||||||||||||||||||||||||\t||||||||||||||||||||||||||\t\t\t\t\t\t\t\t-\t-\t-\t-\t-\t"
)
.as_str(),
);
//||||||||||||||||||||||||||
println!("{name_row}");
println!("{desc_row}");
println!("{img_row}");
println!("");
}
// name Americano อเมริกาโน Amerikano Americano 59-01-01-0003,59-21-01-0003 59-01-02-0001,59-21-02-0001 -,- - - Signature - CoffeeNoMilk,Recommend
// desc Espresso, Water กาแฟ และน้ำ Espresas, vanduo Espresso, Apă |||||||||||||||||||||||||| |||||||||||||||||||||||||| |||||||||||||||||||||||||| - - - - -
// img bn_hot_americano.png - bn_hot_america_no.png bn_hot_america_no.png posi1 |||||||||||||||||||||||||| |||||||||||||||||||||||||| - - - - -
}
}
}
```

View file

@ -2,3 +2,4 @@
pub mod models;
pub mod previews;
pub mod recipe_functions;
pub mod xml;

View file

@ -339,3 +339,12 @@ pub fn grep_latest_versions(dir_path: &str) -> Result<HashMap<String, usize>, st
Ok(vs)
}
pub fn read_tsv_file(path: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
let mut file = File::open(path)?;
let mut content = String::new();
file.read_to_string(&mut content)?;
Ok(content.lines().map(|x| x.to_string()).collect())
}

1
src/xml/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod node;

158
src/xml/node.rs Normal file
View file

@ -0,0 +1,158 @@
use indexmap::IndexMap;
use quick_xml::events::Event;
use std::fs::File;
use std::io::Read;
#[derive(Debug, Clone, Default)]
pub struct Node {
pub name: String,
pub children: Vec<Node>,
pub value: Option<String>,
}
pub fn parse_xml_to_tree(xml: &str) -> Vec<Node> {
let mut reader = quick_xml::Reader::from_str(xml);
let mut stack: Vec<Node> = Vec::new();
let mut roots: Vec<Node> = Vec::new();
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
let name = String::from_utf8_lossy(e.name().as_ref()).into_owned();
stack.push(Node {
name,
children: Vec::new(),
value: None,
});
}
Ok(Event::End(_)) => {
if let Some(finished_node) = stack.pop() {
if let Some(parent) = stack.last_mut() {
parent.children.push(finished_node);
} else {
roots.push(finished_node);
}
}
}
Ok(Event::Text(e)) => {
if let Some(current_node) = stack.last_mut() {
let curr_text = String::from_utf8(e.clone().into_inner().to_vec()).unwrap();
// println!("detect text: {curr_text} -- {e:?}");
current_node.value = Some(curr_text);
}
}
Ok(Event::Eof) => break,
Err(e) => {
println!("error: {e:?}");
}
_ => {}
}
buf.clear();
}
roots
}
pub fn print_tree(nodes: &[Node], depth: usize) {
for node in nodes {
let indent = " ".repeat(depth);
match (&node.value, node.children.is_empty()) {
(Some(val), true) => {
println!("{}<{}>{}</{}>", indent, node.name, val, node.name);
}
(_, false) => {
println!("{}<{}>", indent, node.name);
print_tree(&node.children, depth + 1);
println!("{}</{}>", indent, node.name);
}
(None, true) => {
println!("{}<{}></{}>", indent, node.name, node.name);
}
}
}
}
impl Node {
pub fn find_by_child_value(
&self,
parent_name: &str,
child_name: &str,
target_value: &str,
) -> Vec<Node> {
let mut matches = Vec::new();
if self.name == parent_name {
if self.children.iter().any(|c| {
c.name == child_name && c.clone().value.is_some_and(|x| x.contains(target_value))
}) {
matches.push(self.clone());
}
}
for child in &self.children {
matches.extend(child.find_by_child_value(parent_name, child_name, target_value));
}
matches
}
pub fn get_child(&self, name: &str) -> Option<&Node> {
self.children.iter().find(|c| c.name == name)
}
}
pub fn generate_nodes_from_xml(
catalog_map: Vec<(&str, String)>,
) -> Result<IndexMap<String, Vec<Node>>, Box<dyn std::error::Error>> {
let mut result = IndexMap::new();
for catalog_m in catalog_map {
let catalog_name = catalog_m.0;
let catalog_path = catalog_m.1.clone();
let mut file = File::open(catalog_path)?;
let mut content = String::new();
file.read_to_string(&mut content)?;
// clean comment
let mut new_file = String::new();
for line in content.lines() {
if line.contains(";") && !line.contains(";include") {
continue;
} else {
new_file.push_str(format!("{line}\n").replace("&", "[amp]").as_str());
}
}
let node = parse_xml_to_tree(&new_file);
result.insert(catalog_name.to_string(), node);
}
Ok(result)
}
/// get node from path of node vector
///
/// Example:
/// ```
/// let current_menus_result: Option<&Node> = get_path!(root_node, ScrollableCatalog.Menus);
///
/// // Possible results
///
/// //Some(Node { name: "Menus", children: [Node { name: "Menu", children: [Node { name: "State",....
///
/// //None
/// ```
#[macro_export]
macro_rules! get_path {
($node:expr, $last:ident) => {
$node.get_child(stringify!($last))
};
// recursive case
($node:expr, $next:ident . $($rest:ident).+) => {
$node
.get_child(stringify!($next))
.and_then(|child| get_path!(child, $($rest).+))
};
}