How to import specific parts of the HTML file to WDS? - ibm-watson

I want to import the following part (a) of the following HTML files (b) to WDS.
(a) <meta content="https://qiita.com/xxx/yyy/zzz" property="og:url" />
I created the following WDS configuration file (c) with the following reference, applied it to the environment, and imported the following HTML files (b).
https://console.bluemix.net/docs/services/discovery/custom-config.html#keep_content
But I could find the part (a), neither in the result of "View data schema", nor the result of "Test your configuration on a document " API (see below).
https://www.ibm.com/watson/developercloud/discovery/api/v1/curl.html?curl#test-your-configuration-on-a-document-api
So I have 3 following questions (1)(2)(3).
Would you tell me the correct way to make the configuration file if
the following one (c) is wrong?
Where does the part (a) appear in the result of "View data
schema" if the following configuration file (c) is correct? (Added
as part of extracted_metadata? Is the parameter name is og: url?)
Is the part (a) imported for each divided document if the
attached configuration file (c) is correct?
(b) HTML Files:
https://qiita.com/ishida330/items/5afa235ac6a0461f7e5a
https://qiita.com/ishida330/items/f7cc6d2ea0439fa67b10
https://qiita.com/ishida330/items/b823d7c5b55806f04242
https://qiita.com/sukusuku_watson_editorial_room/items/981a4383f9defc5db6f5
https://qiita.com/sukusuku_watson_editorial_room/items/d8407c938af47bc5ac44
(c) WDS Configuration file:
{
"configuration_id": "cbcec10a-f241-4fb5-a86d-15e1e732495d",
"name": "HTML_conf_0914_2",
"description": null,
"created": "2018-08-03T00:08:52.320Z",
"updated": "2018-08-13T01:42:20.763Z",
"conversions": {
"pdf": {
"heading": {
"fonts": [
{
"level": 1,
"min_size": 24,
"max_size": 80
},
{
"level": 2,
"min_size": 18,
"max_size": 24,
"bold": false,
"italic": false
},
{
"level": 2,
"min_size": 18,
"max_size": 24,
"bold": true
},
{
"level": 3,
"min_size": 13,
"max_size": 18,
"bold": false,
"italic": false
},
{
"level": 3,
"min_size": 13,
"max_size": 18,
"bold": true
},
{
"level": 4,
"min_size": 11,
"max_size": 13,
"bold": false,
"italic": false
}
]
}
},
"word": {
"heading": {
"fonts": [
{
"level": 1,
"min_size": 24,
"bold": false,
"italic": false
},
{
"level": 2,
"min_size": 18,
"max_size": 23,
"bold": true,
"italic": false
},
{
"level": 3,
"min_size": 14,
"max_size": 17,
"bold": false,
"italic": false
},
{
"level": 4,
"min_size": 13,
"max_size": 13,
"bold": true,
"italic": false
}
],
"styles": [
{
"level": 1,
"names": [
"pullout heading",
"pulloutheading",
"header"
]
},
{
"level": 2,
"names": [
"subtitle"
]
}
]
}
},
"html": {
"exclude_tags_completely": [
"script",
"sup"
],
"exclude_tags_keep_content": [
"font",
"em",
"span"
],
"exclude_content": {
"xpaths": [
"//meta[#name]",
"//meta[#property!='og:url']"
]
},
"keep_content": {
"xpaths": [
]
},
"exclude_tag_attributes": [
"EVENT_ACTIONS"
]
},
"json_normalizations": [],
"segment": {
"enabled": true,
"selector_tags": [
"h1",
"h2",
"h3"
]
}
},
"enrichments": [
{
"enrichment": "natural_language_understanding",
"source_field": "text",
"destination_field": "enriched_text",
"options": {
"features": {
"keywords": {},
"entities": {
"sentiment": true,
"emotion": false,
"limit": 50
},
"sentiment": {
"document": true
},
"categories": {},
"relations": {},
"concepts": {
"limit": 8
},
"semantic_roles": {}
}
}
}
],
"normalizations": []
}

As of now, the Watson Discovery service extracts only the following three metadata fields from the HTML <head> section: publicationdate, author and title.
They are expected to be in your HTML file as in the following example:
<html>
<head>
<meta name="author" content="Lulu">
<meta name="publicationdate" content="2015-12-04">
<title>Title of the document</title>
</head>
<body>
content of the document
</body>
</html>
Once those three fields are extracted during the ingestion process, they can be queried under the extracted_metadata section. The following shows and example of where in the query result those fields would be found:
{
"extracted_metadata": {
"publicationdate": "2015-12-04",
"title": "Title of the document",
"author": "Lulu",
"filename": "example.html",
"file_type": "html",
"sha1": "256f2c4161a1b13528513a3d4abdf00b6ac80054"
},
"html": "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><html> ...",
"text": "content of the document",
}
Unfortunately, extracting other types of metadata fields from the HTML <head> section is currently not supported.
There is an alternative way though to ingest custom fields by passing in a metadata part in your POST request. Using curl, you could do it by running a command of the form:
curl -u ${WDS_USERNAME}:${WDS_PASSWORD} \
-F "file=#YOUR_FILE.html" \
-F "metadata=#YOUR_METADATA.json" \
-X POST "https://gateway.watsonplatform.net/discovery/api/v1/environments/{environment_id}/collections/{collection_id}/documents?version=2018-03-05"
See the metadata parameter in the documentation at: https://www.ibm.com/watson/developercloud/discovery/api/v1/curl.html?curl#add-document

Related

How to scroll to a particular section of the data fetched from an API while filtering the data on some parameter?

I am calling an API from a server, and I am getting this as a response. A part of the data I am getting -
{
"id": 322854,
"date": "2022-09-20T18:00:00+00:00",
"time": "18:00",
"timestamp": 1663696800,
"timezone": "UTC",
"stage": null,
"week": null,
"status": {
"long": "Game Finished",
"short": "FT",
"timer": null
},
"league": {
"id": 9,
"name": "French Cup",
"type": "cup",
"season": 2022,
"logo": "https://media-2.api-sports.io/basketball/leagues/9.png"
},
"country": {
"id": 4,
"name": "France",
"code": "FR",
"flag": "https://media-2.api-sports.io/flags/fr.svg"
},
"teams": {
"home": {
"id": 22,
"name": "Boulazac",
"logo": "https://media-2.api-sports.io/basketball/teams/22.png"
},
"away": {
"id": 2294,
"name": "CEP Lorient",
"logo": "https://media-2.api-sports.io/basketball/teams/2294.png"
}
},
"scores": {
"home": {
"quarter_1": 16,
"quarter_2": 12,
"quarter_3": 21,
"quarter_4": 26,
"over_time": null,
"total": 75
},
"away": {
"quarter_1": 9,
"quarter_2": 16,
"quarter_3": 19,
"quarter_4": 23,
"over_time": null,
"total": 67
}
}
}
{
"id": 322854,
"date": "2022-09-20T18:00:00+00:00",
"time": "18:00",
"timestamp": 1663696800,
"timezone": "UTC",
"stage": null,
"week": null,
"status": {
"long": "Game Not Started",
"short": "NS",
"timer": null
},
"league": {
"id": 9,
"name": "French Cup",
"type": "cup",
"season": 2022,
"logo": "https://media-2.api-sports.io/basketball/leagues/9.png"
},
"country": {
"id": 4,
"name": "France",
"code": "FR",
"flag": "https://media-2.api-sports.io/flags/fr.svg"
},
"teams": {
"home": {
"id": 22,
"name": "Boulazac",
"logo": "https://media-2.api-sports.io/basketball/teams/22.png"
},
"away": {
"id": 2294,
"name": "CEP Lorient",
"logo": "https://media-2.api-sports.io/basketball/teams/2294.png"
}
},
"scores": {
"home": {
"quarter_1": 16,
"quarter_2": 12,
"quarter_3": 21,
"quarter_4": 26,
"over_time": null,
"total": 75
},
"away": {
"quarter_1": 9,
"quarter_2": 16,
"quarter_3": 19,
"quarter_4": 23,
"over_time": null,
"total": 67
}
}
}
There are a bunch of such objects in an array each having a different fixture. As you can see that status?.short === "NS" in one while the other is status?.short === "FT". Now, I want to filter the data using these properties.
<div className="fixturesTab-container">
<div className="fixturesTab-container__header">
<h4>Fixtures</h4>
</div>
<div className="fixturesTab-container__body">
{fixtures.map((fixture) => (
<div
key={fixture?.id}
className="fixturesTab-container__body__box"
>
......
</div>
))}
</div>
</div>
I want the data to be displayed in such a manner that when the user browse to that page and the API is being fetched, eventhough the entire data (fixtures) is being displayed in the page. I want the fixtures that have not yet started, i.e has the property of status?.short === "NS", to be displayed in the window that is visible to the user. And the rest data can be acquired by either scrolling up, which will have the fixtures that have been completed, or by scrolling down which will have the fixtures that have not yet been started.
If, I am unable to understand what I want properly, then you can check out this page . This is exactly what I am planning to achieve.

how can i export api nested array of data into excell file in react?

I am working on react e commerce website project and im trying to convert api nested array of data into excell file but im facing the issue. I'm able to get data into excell file in react.js but nested array of data is not showing in excell file???any one can please help me how can i fix it??
//im using this code for covert data into excell sheet
productdownloadFile=(datas=this.state.getlist)=>{
const fileType= "xlsx"
const ws= XLSX.utils.json_to_sheet(datas);
const wb={Sheets:{data:ws},SheetNames:["data"]};
const excelBuffer = XLSX.write(wb, {bookType:"xlsx", type:"array"});
const data = new Blob([excelBuffer], {type:fileType});
FileSaver.saveAs(data, "Product_List"+".xlsx")
}
//my api data is
{
"success": true,
"product": [
{
"id": 1,
"name": "laptop",
"description": "this is awesome product,you should byu it",
"photo": "https://buluckartphoto.s3.ap-south-1.amazonaws.com/download%20%283%29.jpeg",
"lableType": "Veg",
"status": "0",
"isTex": "1",
"GSTrate": 0.23,
"GSTtyp": "inclusive",
"HSNcode": "hsn525fgf",
"videoUpload": "https://buluckartphoto.s3.ap-south-1.amazonaws.com/download%20%283%29.jpeg",
"categoryId": 1,
"subCategoryId": 1,
"settingId": 1,
"createdAt": "2022-05-20T13:26:03.000Z",
"updatedAt": "2022-05-20T13:26:03.000Z",
"featureSetting": {
"recomendedProduct": true,
"deliverySlotStatus": true,
"loyaltyProgram": true,
"COD": true,
"showCOD": false,
"reccemendProductNo": 10
},
"productphotos": [],
"varientModels": [
{
"id": 2,
"sort": 1,
"sku": "htc",
"waightunitno": "250 gram",
"unit": "gram",
"mrp": 1450,
"discount": 450,
"price": 1000,
"stock": 5,
"minstock": 1,
"outofstock": "continew sell after thersold"
}
],
"reccomendProducts": [],
"category": {
"id": 1,
"name": "vegetable",
"photo": "https://buluckartphoto.s3.ap-south-1.amazonaws.com/images.jpeg"
},
"SubCategory": {
"id": 1,
"sub_name": "gold star2",
"photo": "https://buluckartphoto.s3.ap-south-1.amazonaws.com/download.jpeg"
}
}
]
}

Filter an array of obj based on another array value

I have an array of object like this :
[
{
"url": "https://recipes-gatsby-react.netlify.app/",
"stack": [
{
"id": 1,
"title": "GatsbyJs"
},
{
"id": 2,
"title": "React"
},
{
"id": 3,
"title": "GraphQl"
}
],
"id": "Project_1",
"featured": false,
"title": "Gatsby recipes app"
},
{
"url": "https://resort-react-mr.netlify.app",
"stack": [
{
"id": 4,
"title": "React"
},
{
"id": 5,
"title": "Contentful"
}
],
"id": "Project_2",
"featured": false,
"title": "react resort"
},
{
"url": "https://color-generator-react-mr.netlify.app/",
"stack": [
{
"id": 6,
"title": "React"
}
],
"id": "Project_3",
"featured": false,
"title": "color generator"
},
{
"url": "",
"stack": [
{
"id": 7,
"title": "React Native"
},
{
"id": 8,
"title": "Firebase"
}
],
"id": "Project_4",
"featured": false,
"title": "Book-Worm"
},
{
"url": "",
"stack": [
{
"id": 9,
"title": "React Native"
},
{
"id": 10,
"title": "Firebase"
},
{
"id": 11,
"title": "Stripe"
},
{
"id": 12,
"title": "Google Api"
}
],
"id": "Project_5",
"featured": false,
"title": "Delivery App"
},
{
"url": "https://cocktails-react-app-mr.netlify.app/",
"stack": [
{
"id": 13,
"title": "React"
}
],
"id": "Project_6",
"featured": false,
"title": "Cocktails App"
},
{
"url": "https://www.ideacasaitalia.it/index.php",
"stack": [
{
"id": 14,
"title": "Prestashop"
}
],
"id": "Project_7",
"featured": false,
"title": "Idea Casa Italia"
}
]
and I have another (simple) array like this one for example
["react","prestashop"]
I would like to filter the first array based on STACK key on the value of the second array, i.e. I would like to be returned all elements that have at least one element of the array (variable).
I have come to this
const filter = value => {
const newProj = relProjects.filter(obj =>
obj.stack.some(st => st.title.toLowerCase().includes(value))
)
setProjRel(newProj)
}
console.log(filter(["react","prestashop"])
however in this way I can only get the elements that contain all the elements of the array. Instead I would like to have returned all the elements that have AT LEAST 1 stack of the ones I passedThanks to who will answer me
Using Array#filter and Array#includes
Filter through the data array. With the predicate being that the stack array has at least one item that is included in the matches array
const data = [{"url":"https://recipes-gatsby-react.netlify.app/","stack":[{"id":1,"title":"GatsbyJs"},{"id":2,"title":"React"},{"id":3,"title":"GraphQl"}],"id":"Project_1","featured":false,"title":"Gatsby recipes app"},{"url":"https://resort-react-mr.netlify.app","stack":[{"id":4,"title":"React"},{"id":5,"title":"Contentful"}],"id":"Project_2","featured":false,"title":"react resort"},{"url":"https://color-generator-react-mr.netlify.app/","stack":[{"id":6,"title":"React"}],"id":"Project_3","featured":false,"title":"color generator"},{"url":"","stack":[{"id":7,"title":"React Native"},{"id":8,"title":"Firebase"}],"id":"Project_4","featured":false,"title":"Book-Worm"},{"url":"","stack":[{"id":9,"title":"React Native"},{"id":10,"title":"Firebase"},{"id":11,"title":"Stripe"},{"id":12,"title":"Google Api"}],"id":"Project_5","featured":false,"title":"Delivery App"},{"url":"https://cocktails-react-app-mr.netlify.app/","stack":[{"id":13,"title":"React"}],"id":"Project_6","featured":false,"title":"Cocktails App"},{"url":"https://www.ideacasaitalia.it/index.php","stack":[{"id":14,"title":"Prestashop"}],"id":"Project_7","featured":false,"title":"Idea Casa Italia"}];
const matches = ["react", "prestashop"];
const filtered = data.filter(({ stack }) => stack.filter(({ title }) => matches.includes(title.toLowerCase())).length > 0);
console.log(filtered);

Angular - get data from Object Array

Good morning, can anyone advise me? I'm pretty clueless. I've already spent a few hours on it and I don't know how to solve this.
The data is fictitious and the original JSON is far more complex.
JSON
{
"main": [
[
{
"type": "dasdasdasd",
"id": 5,
"content": {
"title": "adadadsad",
"items": [
{
"date": "2012-02-02T11:23:00Z",
"id": 12,
"name": "test",
"isEnabled": false,
"isHighlited": false,
"images": {
"lists": {
"small": [
{
"id": 18,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"original": [
{
"id": 19,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"large": [
{
"id": 22,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"medium": [
{
"id": 23,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
]
}
},
"enum": "LINIE",
"url": "https://test.com"
}
]
}
}
]
],
"second": [
]
}
How can I get from main -> content -> items -> images -> url?
More precisely each images and from that url.
Thank you very much in advance for your help.
It looks like your main array has an array as 1st element. If that is correct, you take the first element of main, and then loop over that.
let jsonObj = {
"main": [
[
{
"type": "dasdasdasd",
"id": 5,
"content": {
"title": "adadadsad",
"items": [
{
"date": "2012-02-02T11:23:00Z",
"id": 12,
"name": "test",
"isEnabled": false,
"isHighlited": false,
"images": {
"lists": {
"small": [
{
"id": 18,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"original": [
{
"id": 19,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"large": [
{
"id": 22,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
],
"medium": [
{
"id": 23,
"position": 0,
"titleImage": true,
"url": "",
"thumbnailReady": true
}
]
}
},
"enum": "LINIE",
"url": "https://test.com"
}
]
}
}
]
],
"second": [
]
}
let smallImg = "", originalImg = "", largeImg = "", mediumImg = "";
jsonObj.main[0].forEach(mainItem => {
mainItem.content.items.forEach(item => {
smallImg = item.images.lists.small[0].url;
originalImg = item.images.lists.original[0].url;
largeImg = item.images.lists.large[0].url;
mediumImg = item.images.lists.medium[0].url;
});
});
console.log(smallImg);
console.log(originalImg);
console.log(largeImg);
console.log(mediumImg);

md-contact-chips with complex data object

I have a complex object like attached below. I want to implement tag functionality like in stackoverflow using angular material-contact-chip codepen link. Can someone help me on getting this done
{
"statusReason": "OK",
"responseHeaders": {
"Transfer-Encoding": "chunked",
"Server": "Apache-Coyote\/1.1",
"Date": "Sat, 12 Dec 2015 15:22:00 GMT",
"Content-Type": "application\/json"
},
"isSuccessful": true,
"responseTime": 4,
"totalTime": 6,
"rows": [{
"doc": {
"over16": true,
"address3": "Waterside",
"familyName": "tlor",
"ldapId": 2,
"acceptedTermsAndConditions": true,
"id": 2,
"email": "kevin#gmail.com",
"status": {
"name": "Active",
"id": 2
}
}
}, {
"doc": {
"over16": true,
"address3": "Waterside",
"familyName": "sarha",
"ldapId": 2,
"acceptedTermsAndConditions": true,
"id": 2,
"email": "ain#gmail.com",
"status":{
"name": "Active",
"id": 2
}
}
}],
"statusCode": 200
}

Resources